Message ID | d20dc9939523fac490bc02e57d7836f680916a36.1718228494.git.nicolinc@nvidia.com |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | Add Tegra241 (Grace) CMDQV Support (part 1/2) | expand |
On Wed, Jun 12, 2024 at 02:45:31PM -0700, Nicolin Chen wrote: > The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the > CS field of CMD_SYNC. Add a quirk flag to accommodate that. > > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> > --- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++++++- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ > 2 files changed, 10 insertions(+), 1 deletion(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index c864c634cd23..ba0e24d5ffbf 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > + if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) { > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > + return; > + } > + > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > return; > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > struct arm_smmu_cmdq *cmdq, > struct arm_smmu_ll_queue *llq) > { > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > + !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)) > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > index 180c0b1e0658..01227c0de290 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > @@ -543,6 +543,9 @@ struct arm_smmu_queue { > > u32 __iomem *prod_reg; > u32 __iomem *cons_reg; > + > +#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY BIT(0) /* CMD_SYNC CS field supports CS_NONE only */ > + u32 quirks; Please can you use the existing smmu->options field instead of adding another place to track quirks? Or do you need this only for some of the queues for a given SMMU device? Thanks, Will
Hi Will, On Tue, Jul 02, 2024 at 06:43:07PM +0100, Will Deacon wrote: > On Wed, Jun 12, 2024 at 02:45:31PM -0700, Nicolin Chen wrote: > > The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the > > CS field of CMD_SYNC. Add a quirk flag to accommodate that. > > > > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> > > --- > > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++++++- > > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ > > 2 files changed, 10 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > index c864c634cd23..ba0e24d5ffbf 100644 > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > > > + if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) { > > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > > + return; > > + } > > + > > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > > return; > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > > struct arm_smmu_cmdq *cmdq, > > struct arm_smmu_ll_queue *llq) > > { > > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > > + !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)) > > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > > > return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > > index 180c0b1e0658..01227c0de290 100644 > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > > @@ -543,6 +543,9 @@ struct arm_smmu_queue { > > > > u32 __iomem *prod_reg; > > u32 __iomem *cons_reg; > > + > > +#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY BIT(0) /* CMD_SYNC CS field supports CS_NONE only */ > > + u32 quirks; > > Please can you use the existing smmu->options field instead of adding > another place to track quirks? Or do you need this only for some of the > queues for a given SMMU device? VCMDQs are extension of a regular SMMU (with its own CMDQ). So, SMMU CMDQ still supports SIG_IRQ for the CS field, while VCMDQs could only support SIG_NONE. In another word, this quirk is not per SMMU but per Queue. I can highlight this in the commit message, if that would make it clear. Thanks Nicolin
On Tue, Jul 02, 2024 at 11:19:56AM -0700, Nicolin Chen wrote: > Hi Will, > > On Tue, Jul 02, 2024 at 06:43:07PM +0100, Will Deacon wrote: > > On Wed, Jun 12, 2024 at 02:45:31PM -0700, Nicolin Chen wrote: > > > The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the > > > CS field of CMD_SYNC. Add a quirk flag to accommodate that. > > > > > > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > > > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> > > > --- > > > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++++++- > > > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ > > > 2 files changed, 10 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > > index c864c634cd23..ba0e24d5ffbf 100644 > > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > > > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > > > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > > > > > + if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) { > > > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > > > + return; > > > + } > > > + > > > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > > > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > > > return; > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > > > struct arm_smmu_cmdq *cmdq, > > > struct arm_smmu_ll_queue *llq) > > > { > > > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > > > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > > > + !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)) > > > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > > > > > return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > > > index 180c0b1e0658..01227c0de290 100644 > > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > > > @@ -543,6 +543,9 @@ struct arm_smmu_queue { > > > > > > u32 __iomem *prod_reg; > > > u32 __iomem *cons_reg; > > > + > > > +#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY BIT(0) /* CMD_SYNC CS field supports CS_NONE only */ > > > + u32 quirks; > > > > Please can you use the existing smmu->options field instead of adding > > another place to track quirks? Or do you need this only for some of the > > queues for a given SMMU device? > > VCMDQs are extension of a regular SMMU (with its own CMDQ). So, > SMMU CMDQ still supports SIG_IRQ for the CS field, while VCMDQs > could only support SIG_NONE. In another word, this quirk is not > per SMMU but per Queue. > > I can highlight this in the commit message, if that would make > it clear. I think we could still use smmu->options and have something like ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY which could be applied when the queue is != arm_smmu_get_cmdq(smmu). Will
On Tue, Jul 02, 2024 at 07:49:42PM +0100, Will Deacon wrote: > On Tue, Jul 02, 2024 at 11:19:56AM -0700, Nicolin Chen wrote: > > Hi Will, > > > > On Tue, Jul 02, 2024 at 06:43:07PM +0100, Will Deacon wrote: > > > On Wed, Jun 12, 2024 at 02:45:31PM -0700, Nicolin Chen wrote: > > > > The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the > > > > CS field of CMD_SYNC. Add a quirk flag to accommodate that. > > > > > > > > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > > > > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> > > > > --- > > > > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 8 +++++++- > > > > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ > > > > 2 files changed, 10 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > > > index c864c634cd23..ba0e24d5ffbf 100644 > > > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > > > > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > > > > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > > > > > > > + if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) { > > > > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > > > > + return; > > > > + } > > > > + > > > > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > > > > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > > > > return; > > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > > > > struct arm_smmu_cmdq *cmdq, > > > > struct arm_smmu_ll_queue *llq) > > > > { > > > > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > > > > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > > > > + !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)) > > > > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > > > > > > > return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); > > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > > > > index 180c0b1e0658..01227c0de290 100644 > > > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > > > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > > > > @@ -543,6 +543,9 @@ struct arm_smmu_queue { > > > > > > > > u32 __iomem *prod_reg; > > > > u32 __iomem *cons_reg; > > > > + > > > > +#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY BIT(0) /* CMD_SYNC CS field supports CS_NONE only */ > > > > + u32 quirks; > > > > > > Please can you use the existing smmu->options field instead of adding > > > another place to track quirks? Or do you need this only for some of the > > > queues for a given SMMU device? > > > > VCMDQs are extension of a regular SMMU (with its own CMDQ). So, > > SMMU CMDQ still supports SIG_IRQ for the CS field, while VCMDQs > > could only support SIG_NONE. In another word, this quirk is not > > per SMMU but per Queue. > > > > I can highlight this in the commit message, if that would make > > it clear. > > I think we could still use smmu->options and have something like > ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY which could be applied > when the queue is != arm_smmu_get_cmdq(smmu). A queue can be cmdq, ecmdq, vcmdq. Only VCMDQ has such a quirk. So arm_smmu_get_cmdq(smmu) is unlikely going to work if we add ECMDQ later. Also, ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY is very ambiguous IMHO. What we need is to check clearly if VCMDQ is being used, so that leaves us an alternative: -------------------------------------------------------------- enum arm_smmu_cmdq_type { ARM_SMMU_CMDQ, ARM_SMMU_ECMDQ, TEGRA241_VCMDQ, }; @@ -543,6 +543,9 @@ struct arm_smmu_queue { u32 __iomem *prod_reg; u32 __iomem *cons_reg; + + enum arm_smmu_queue_type type; }; struct arm_smmu_queue_poll { @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); + if (cmdq->type == TEGRA241_VCMDQ) { + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); + return; + } + if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); return; @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + cmdq->type != TEGRA241_VCMDQ) { return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); -------------------------------------------------------------- Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY is more general looking though.. Thanks Nicolin
On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote: > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > + if (cmdq->type == TEGRA241_VCMDQ) { > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > + return; > + } > + > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > return; > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > struct arm_smmu_cmdq *cmdq, > struct arm_smmu_ll_queue *llq) > { > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > + cmdq->type != TEGRA241_VCMDQ) { > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > -------------------------------------------------------------- > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY > is more general looking though.. And we would need some additional lines of comments for the two pieces above, explaining why TEGRA241_VCMDQ type needs the first one while bypasses the second one. Again, it feels even worse :( Thanks Nicolin
On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote: > On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote: > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > > > + if (cmdq->type == TEGRA241_VCMDQ) { > > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > > + return; > > + } > > + > > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > > return; > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > > struct arm_smmu_cmdq *cmdq, > > struct arm_smmu_ll_queue *llq) > > { > > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > > + cmdq->type != TEGRA241_VCMDQ) { > > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > > > -------------------------------------------------------------- > > > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY > > is more general looking though.. > > And we would need some additional lines of comments for the two > pieces above, explaining why TEGRA241_VCMDQ type needs the first > one while bypasses the second one. Again, it feels even worse :( I hacked the code around a bit this afternoon. Please can you see if: https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip does roughly what you need? Will
Hi Will, On Fri, Jul 05, 2024 at 04:27:21PM +0100, Will Deacon wrote: > On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote: > > On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote: > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > > > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > > > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > > > > > + if (cmdq->type == TEGRA241_VCMDQ) { > > > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > > > + return; > > > + } > > > + > > > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > > > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > > > return; > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > > > struct arm_smmu_cmdq *cmdq, > > > struct arm_smmu_ll_queue *llq) > > > { > > > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > > > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > > > + cmdq->type != TEGRA241_VCMDQ) { > > > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > > > > > -------------------------------------------------------------- > > > > > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY > > > is more general looking though.. > > > > And we would need some additional lines of comments for the two > > pieces above, explaining why TEGRA241_VCMDQ type needs the first > > one while bypasses the second one. Again, it feels even worse :( > > I hacked the code around a bit this afternoon. Please can you see if: > > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip > > does roughly what you need? I appreciate the patch. Yet, we cannot use IORT's model field. This would need to go through IORT documentation, for A. And B, we had a very long discussion with ARM (Robin was there) years ago, and concluded that this CMDQV would not be a model in IORT but a DSDT node as an extension. So, this is firm... With that, we cannot avoid an unconditional hard-coding tegra function call even if we switch to an impl design: +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu) +{ + /* + * unconditional go through ACPI table to detect if there is a tegra241 + * implementation that extends SMMU with a CMDQV. The probe() will fill + * the smmu->impl pointer upon success. Otherwise, fall back to regular + * SMMU CMDQ. + */ + tegra241_impl_acpi_probe(smmu); + return 0; +} As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look very optimal to me. But if you insist on having an smmu option, we still have to take in the PATCH-3 in this series, enforcing an arm_smmu_cmdq_build_sync_cmd() call in the IRQ handler too. So, it would eventually look like [attachment]. Thanks! Nicolin diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 3e2eb88535de..e57ea8d39c98 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -352,15 +352,26 @@ arm_smmu_get_cmdq(struct arm_smmu_device *smmu, u8 opcode) return &smmu->cmdq; } +static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) +{ + if (cmdq == &smmu->cmdq) + return false; + + return smmu->options & ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY; +} + static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, - struct arm_smmu_queue *q, u32 prod) + struct arm_smmu_cmdq *cmdq, u32 prod) { + struct arm_smmu_queue *q = &cmdq->q; + cmd[1] = 0; cmd[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) | FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); - if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) { + if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) { cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); return; } @@ -380,7 +391,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, } void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q) + struct arm_smmu_cmdq *cmdq) { static const char * const cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", @@ -388,6 +399,7 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", }; + struct arm_smmu_queue *q = &cmdq->q; int i; u64 cmd[CMDQ_ENT_DWORDS]; @@ -426,14 +438,14 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); /* Convert the erroneous command into a CMD_SYNC */ - arm_smmu_cmdq_build_sync_cmd(cmd, smmu, q, cons); + arm_smmu_cmdq_build_sync_cmd(cmd, smmu, cmdq, cons); queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) { - __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); + __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq); } /* @@ -711,7 +723,7 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, struct arm_smmu_ll_queue *llq) { if (smmu->options & ARM_SMMU_OPT_MSIPOLL && - !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)) + !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); @@ -797,7 +809,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); if (sync) { prod = queue_inc_prod_n(&llq, n); - arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); + arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod); queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); /* @@ -3985,6 +3997,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, smmu->features |= ARM_SMMU_FEAT_COHERENCY; smmu->tegra241_cmdqv = tegra241_cmdqv_acpi_probe(smmu, node); + if (smmu->tegra241_cmdqv) + smmu->options |= ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY; return 0; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 2c1fe7e129cd..0962aa839080 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -654,10 +654,11 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) u32 features; -#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) -#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) -#define ARM_SMMU_OPT_MSIPOLL (1 << 2) -#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) +#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) +#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) +#define ARM_SMMU_OPT_MSIPOLL (1 << 2) +#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) +#define ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY (1 << 4) u32 options; struct arm_smmu_cmdq cmdq; @@ -805,7 +806,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size); void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q); + struct arm_smmu_cmdq *cmdq); int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, struct arm_smmu_queue *q, void __iomem *page, unsigned long prod_off, unsigned long cons_off, diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index bb696c66e56d..4b1de8517bec 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -266,7 +266,7 @@ static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf) u32 gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); __arm_smmu_cmdq_skip_err(vintf->cmdqv->smmu, - &vcmdq->cmdq.q); + &vcmdq->cmdq); writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN)); map &= ~BIT_ULL(lidx); }
On Fri, Jul 05, 2024 at 11:10:47AM -0700, Nicolin Chen wrote: > Hi Will, > > On Fri, Jul 05, 2024 at 04:27:21PM +0100, Will Deacon wrote: > > On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote: > > > On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote: > > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > > > > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > > > > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > > > > > > > + if (cmdq->type == TEGRA241_VCMDQ) { > > > > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > > > > + return; > > > > + } > > > > + > > > > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > > > > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > > > > return; > > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > > > > struct arm_smmu_cmdq *cmdq, > > > > struct arm_smmu_ll_queue *llq) > > > > { > > > > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > > > > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > > > > + cmdq->type != TEGRA241_VCMDQ) { > > > > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > > > > > > > -------------------------------------------------------------- > > > > > > > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY > > > > is more general looking though.. > > > > > > And we would need some additional lines of comments for the two > > > pieces above, explaining why TEGRA241_VCMDQ type needs the first > > > one while bypasses the second one. Again, it feels even worse :( > > > > I hacked the code around a bit this afternoon. Please can you see if: > > > > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip > > > > does roughly what you need? > > I appreciate the patch. Yet, we cannot use IORT's model field. > This would need to go through IORT documentation, for A. And B, > we had a very long discussion with ARM (Robin was there) years > ago, and concluded that this CMDQV would not be a model in IORT > but a DSDT node as an extension. So, this is firm... > > With that, we cannot avoid an unconditional hard-coding tegra > function call even if we switch to an impl design: > > +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu) > +{ > + /* > + * unconditional go through ACPI table to detect if there is a tegra241 > + * implementation that extends SMMU with a CMDQV. The probe() will fill > + * the smmu->impl pointer upon success. Otherwise, fall back to regular > + * SMMU CMDQ. > + */ > + tegra241_impl_acpi_probe(smmu); > + return 0; > +} > > As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look > very optimal to me. But if you insist on having an smmu option, > we still have to take in the PATCH-3 in this series, enforcing > an arm_smmu_cmdq_build_sync_cmd() call in the IRQ handler too. > So, it would eventually look like [attachment]. Please ignore the attachment. Since we are adding arm_smmu_impl, I figure that we could add an arm_smmu_cmdq_impl too. There's an another small feature that I didn't implement in this v9, while being able to benefit from a cmdq impl now. The impl can also hold a boolean busy_polling, so we won't need a global smmu option. I will send a new version asap, though I am not sure if we can still make it to this cycle that we hoped for :-/ Thanks Nicolin
On Fri, Jul 05, 2024 at 11:10:42AM -0700, Nicolin Chen wrote: > On Fri, Jul 05, 2024 at 04:27:21PM +0100, Will Deacon wrote: > > On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote: > > > On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote: > > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > > > > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > > > > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > > > > > > > + if (cmdq->type == TEGRA241_VCMDQ) { > > > > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > > > > + return; > > > > + } > > > > + > > > > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > > > > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > > > > return; > > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > > > > struct arm_smmu_cmdq *cmdq, > > > > struct arm_smmu_ll_queue *llq) > > > > { > > > > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > > > > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > > > > + cmdq->type != TEGRA241_VCMDQ) { > > > > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > > > > > > > -------------------------------------------------------------- > > > > > > > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY > > > > is more general looking though.. > > > > > > And we would need some additional lines of comments for the two > > > pieces above, explaining why TEGRA241_VCMDQ type needs the first > > > one while bypasses the second one. Again, it feels even worse :( > > > > I hacked the code around a bit this afternoon. Please can you see if: > > > > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip > > > > does roughly what you need? > > I appreciate the patch. Yet, we cannot use IORT's model field. > This would need to go through IORT documentation, for A. And B, > we had a very long discussion with ARM (Robin was there) years > ago, and concluded that this CMDQV would not be a model in IORT > but a DSDT node as an extension. So, this is firm... Seems like a bad outcome given that you've clearly modified the IP, but whatever. We can parse the DSDT when we detect whatever the model actually is; I don't think that's a huge issue. > With that, we cannot avoid an unconditional hard-coding tegra > function call even if we switch to an impl design: > > +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu) > +{ > + /* > + * unconditional go through ACPI table to detect if there is a tegra241 > + * implementation that extends SMMU with a CMDQV. The probe() will fill > + * the smmu->impl pointer upon success. Otherwise, fall back to regular > + * SMMU CMDQ. > + */ > + tegra241_impl_acpi_probe(smmu); In-line the minimal DSDT parsing to figure out if we're on a Tegra part. If it's that bad, put it in a static inline in arm-smmu-v3.h. > + return 0; > +} > > As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look > very optimal to me. "optimal" in what sense? In that you don't like how it smells, or that it's measurably bad? > But if you insist on having an smmu option, we still have to take in the > PATCH-3 in this series, enforcing an arm_smmu_cmdq_build_sync_cmd() call > in the IRQ handler too. So, it would eventually look like [attachment]. With my hacks, I think you can just call arm_smmu_cmdq_build_sync_cmd() from the irqhandler and it will work. Will
On Fri, Jul 05, 2024 at 05:32:24PM -0700, Nicolin Chen wrote: > On Fri, Jul 05, 2024 at 11:10:47AM -0700, Nicolin Chen wrote: > > On Fri, Jul 05, 2024 at 04:27:21PM +0100, Will Deacon wrote: > > > On Tue, Jul 02, 2024 at 01:10:19PM -0700, Nicolin Chen wrote: > > > > On Tue, Jul 02, 2024 at 12:47:14PM -0700, Nicolin Chen wrote: > > > > > @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, > > > > > FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | > > > > > FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); > > > > > > > > > > + if (cmdq->type == TEGRA241_VCMDQ) { > > > > > + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); > > > > > + return; > > > > > + } > > > > > + > > > > > if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { > > > > > cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); > > > > > return; > > > > > @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, > > > > > struct arm_smmu_cmdq *cmdq, > > > > > struct arm_smmu_ll_queue *llq) > > > > > { > > > > > - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) > > > > > + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && > > > > > + cmdq->type != TEGRA241_VCMDQ) { > > > > > return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); > > > > > > > > > > -------------------------------------------------------------- > > > > > > > > > > Would you prefer this one? I feel CMDQ_QUIRK_SYNC_CS_NONE_ONLY > > > > > is more general looking though.. > > > > > > > > And we would need some additional lines of comments for the two > > > > pieces above, explaining why TEGRA241_VCMDQ type needs the first > > > > one while bypasses the second one. Again, it feels even worse :( > > > > > > I hacked the code around a bit this afternoon. Please can you see if: > > > > > > https://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git/log/?h=for-nicolin/grace-vcmdq-wip > > > > > > does roughly what you need? > > > > I appreciate the patch. Yet, we cannot use IORT's model field. > > This would need to go through IORT documentation, for A. And B, > > we had a very long discussion with ARM (Robin was there) years > > ago, and concluded that this CMDQV would not be a model in IORT > > but a DSDT node as an extension. So, this is firm... > > > > With that, we cannot avoid an unconditional hard-coding tegra > > function call even if we switch to an impl design: > > > > +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu) > > +{ > > + /* > > + * unconditional go through ACPI table to detect if there is a tegra241 > > + * implementation that extends SMMU with a CMDQV. The probe() will fill > > + * the smmu->impl pointer upon success. Otherwise, fall back to regular > > + * SMMU CMDQ. > > + */ > > + tegra241_impl_acpi_probe(smmu); > > + return 0; > > +} > > > > As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look > > very optimal to me. But if you insist on having an smmu option, > > we still have to take in the PATCH-3 in this series, enforcing > > an arm_smmu_cmdq_build_sync_cmd() call in the IRQ handler too. > > So, it would eventually look like [attachment]. > > Please ignore the attachment. Since we are adding arm_smmu_impl, > I figure that we could add an arm_smmu_cmdq_impl too. There's an > another small feature that I didn't implement in this v9, while > being able to benefit from a cmdq impl now. > > The impl can also hold a boolean busy_polling, so we won't need > a global smmu option. So /that/ might be overkill. Architectural queues can use polling, so I don't mind having that option in the driver and it should keep the number of impl hooks to a minimum. > I will send a new version asap, though I am not sure if we can > still make it to this cycle that we hoped for :-/ I'm in fixes-only mode at this point, especially since we've not had a linux-next for a while. Will
On Mon, Jul 08, 2024 at 12:29:28PM +0100, Will Deacon wrote: > On Fri, Jul 05, 2024 at 11:10:42AM -0700, Nicolin Chen wrote: > > But if you insist on having an smmu option, we still have to take in the > > PATCH-3 in this series, enforcing an arm_smmu_cmdq_build_sync_cmd() call > > in the IRQ handler too. So, it would eventually look like [attachment]. > > With my hacks, I think you can just call arm_smmu_cmdq_build_sync_cmd() > from the irqhandler and it will work. Hmm, actually, that will mean we end up using MSIs for the error case on hardware which supports it, which is a strange change in behaviour. What does your hardware do if it sees SIG_SEV in a CMD_SYNC? Is it just a case of failing to generate the event on completion, or does it treat it as an invalid opcode? Will
On Mon, Jul 08, 2024 at 12:29:28PM +0100, Will Deacon wrote: > > With that, we cannot avoid an unconditional hard-coding tegra > > function call even if we switch to an impl design: > > > > +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu) > > +{ > > + /* > > + * unconditional go through ACPI table to detect if there is a tegra241 > > + * implementation that extends SMMU with a CMDQV. The probe() will fill > > + * the smmu->impl pointer upon success. Otherwise, fall back to regular > > + * SMMU CMDQ. > > + */ > > + tegra241_impl_acpi_probe(smmu); > > In-line the minimal DSDT parsing to figure out if we're on a Tegra part. > If it's that bad, put it in a static inline in arm-smmu-v3.h. OK. How about the following? /* arm-smmu-v3.h */ static inline void arm_smmu_impl_acpi_dsdt_probe(struct arm_smmu_device *smmu, struct acpi_iort_node *node) { tegra241_cmdqv_acpi_dsdt_probe(smmu, node); } /* arm-smmu-v3.c */ static int arm_smmu_impl_acpi_probe(struct arm_smmu_device *smmu, struct acpi_iort_node *node) { /* * DSDT might holds some SMMU extension, so we have no option but to go * through ACPI tables unconditionally. This probe function should fill * the smmu->impl pointer upon success. Otherwise, just carry on with a * standard SMMU. */ arm_smmu_impl_acpi_dsdt_probe(smmu, node); return 0; } > > + return 0; > > +} > > > > As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look > > very optimal to me. > > "optimal" in what sense? In that you don't like how it smells, or that > it's measurably bad? It would potentially not work if someday an implementation has two secondary queues? I got your point of making it an option just like the existing ARM_SMMU_OPT_MSIPOLL though.. Thanks Nicolin
On Mon, Jul 08, 2024 at 12:31:15PM +0100, Will Deacon wrote: > > > As for arm_smmu_cmdq_needs_busy_polling, it doesn't really look > > > very optimal to me. But if you insist on having an smmu option, > > > we still have to take in the PATCH-3 in this series, enforcing > > > an arm_smmu_cmdq_build_sync_cmd() call in the IRQ handler too. > > > So, it would eventually look like [attachment]. > > > > Please ignore the attachment. Since we are adding arm_smmu_impl, > > I figure that we could add an arm_smmu_cmdq_impl too. There's an > > another small feature that I didn't implement in this v9, while > > being able to benefit from a cmdq impl now. > > > > The impl can also hold a boolean busy_polling, so we won't need > > a global smmu option. > > So /that/ might be overkill. Architectural queues can use polling, so I > don't mind having that option in the driver and it should keep the number > of impl hooks to a minimum. OK. Let's make an option as you suggested. > > I will send a new version asap, though I am not sure if we can > > still make it to this cycle that we hoped for :-/ > > I'm in fixes-only mode at this point, especially since we've not had a > linux-next for a while. Sad that we missed again. Thanks for letting me know that.. Nicolin
On Mon, Jul 08, 2024 at 12:43:26PM +0100, Will Deacon wrote: > External email: Use caution opening links or attachments > > > On Mon, Jul 08, 2024 at 12:29:28PM +0100, Will Deacon wrote: > > On Fri, Jul 05, 2024 at 11:10:42AM -0700, Nicolin Chen wrote: > > > But if you insist on having an smmu option, we still have to take in the > > > PATCH-3 in this series, enforcing an arm_smmu_cmdq_build_sync_cmd() call > > > in the IRQ handler too. So, it would eventually look like [attachment]. > > > > With my hacks, I think you can just call arm_smmu_cmdq_build_sync_cmd() > > from the irqhandler and it will work. > > Hmm, actually, that will mean we end up using MSIs for the error case on > hardware which supports it, which is a strange change in behaviour. Yes. I highlighted the smae in the commit log of PATCH-3: iommu/arm-smmu-v3: Enforce arm_smmu_cmdq_build_sync_cmd Do you foresee some potential risk of doing that? > What does your hardware do if it sees SIG_SEV in a CMD_SYNC? Is it just > a case of failing to generate the event on completion, or does it treat > it as an invalid opcode? That would be an invalid opcode. Thanks Nicolin
Hi Will, On Mon, Jul 08, 2024 at 11:00:00AM -0700, Nicolin Chen wrote: > On Mon, Jul 08, 2024 at 12:29:28PM +0100, Will Deacon wrote: > > > With that, we cannot avoid an unconditional hard-coding tegra > > > function call even if we switch to an impl design: > > > > > > +static int acpi_smmu_impl_init(u32 model, struct arm_smmu_device *smmu) > > > +{ > > > + /* > > > + * unconditional go through ACPI table to detect if there is a tegra241 > > > + * implementation that extends SMMU with a CMDQV. The probe() will fill > > > + * the smmu->impl pointer upon success. Otherwise, fall back to regular > > > + * SMMU CMDQ. > > > + */ > > > + tegra241_impl_acpi_probe(smmu); > > > > In-line the minimal DSDT parsing to figure out if we're on a Tegra part. > > If it's that bad, put it in a static inline in arm-smmu-v3.h. > > OK. How about the following? > > /* arm-smmu-v3.h */ > static inline void arm_smmu_impl_acpi_dsdt_probe(struct arm_smmu_device *smmu, > struct acpi_iort_node *node) > { > tegra241_cmdqv_acpi_dsdt_probe(smmu, node); > } > > /* arm-smmu-v3.c */ > static int arm_smmu_impl_acpi_probe(struct arm_smmu_device *smmu, > struct acpi_iort_node *node) > { > /* > * DSDT might holds some SMMU extension, so we have no option but to go > * through ACPI tables unconditionally. This probe function should fill > * the smmu->impl pointer upon success. Otherwise, just carry on with a > * standard SMMU. > */ > arm_smmu_impl_acpi_dsdt_probe(smmu, node); > > return 0; > } I have reworked my series and it looks like: ------------------------------------------------------------- @ -627,9 +630,35 @@ struct arm_smmu_strtab_cfg { u32 strtab_base_cfg; }; +struct arm_smmu_impl { + int (*device_reset)(struct arm_smmu_device *smmu); + void (*device_remove)(struct arm_smmu_device *smmu); + struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu, + u8 opcode); +}; + +#ifdef CONFIG_TEGRA241_CMDQV +struct arm_smmu_device * +tegra241_cmdqv_acpi_dsdt_probe(struct arm_smmu_device *smmu, + struct acpi_iort_node *node); +#endif + +static inline struct arm_smmu_device * +arm_smmu_impl_acpi_dsdt_probe(struct arm_smmu_device *smmu, + struct acpi_iort_node *node) +{ +#ifdef CONFIG_TEGRA241_CMDQV + smmu = tegra241_cmdqv_acpi_dsdt_probe(smmu, node); +#endif + return smmu; +} + /* An SMMUv3 instance */ struct arm_smmu_device { struct device *dev; + /* An SMMUv3 implementation */ + const struct arm_smmu_impl *impl; + void __iomem *base; void __iomem *page1; ------------------------------------------------------------- One thing that I want to confirm is about the smmu pointer. I implemented in the way that SMMUv2 driver does, i.e. the passed-in SMMU pointer gets devm_realloc() to &cmdev->smmu. Is it something you would prefer? Thanks Nicolin
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index c864c634cd23..ba0e24d5ffbf 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -345,6 +345,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) | FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); + if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) { + cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); + return; + } + if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) { cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); return; @@ -690,7 +695,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)) return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 180c0b1e0658..01227c0de290 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -543,6 +543,9 @@ struct arm_smmu_queue { u32 __iomem *prod_reg; u32 __iomem *cons_reg; + +#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY BIT(0) /* CMD_SYNC CS field supports CS_NONE only */ + u32 quirks; }; struct arm_smmu_queue_poll {