diff mbox series

[v6,2/6] iommu/arm-smmu-v3: Add CS_NONE quirk

Message ID 81d79f51c69604a38ea4f72c8ac2c573c52e8609.1714451595.git.nicolinc@nvidia.com
State Superseded
Headers show
Series Add Tegra241 (Grace) CMDQV Support (part 1/2) | expand

Commit Message

Nicolin Chen April 30, 2024, 4:43 a.m. UTC
The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the
CS field of CMD_SYNC. Add a quirk flag to accommodate that.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  4 ++++
 2 files changed, 12 insertions(+), 2 deletions(-)

Comments

Jason Gunthorpe April 30, 2024, 2:22 p.m. UTC | #1
On Mon, Apr 29, 2024 at 09:43:45PM -0700, Nicolin Chen wrote:
> The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the
> CS field of CMD_SYNC. Add a quirk flag to accommodate that.
> 
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++--
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  4 ++++
>  2 files changed, 12 insertions(+), 2 deletions(-)

This seems fine, other than the misplaced hunk

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

But it might be tidier like the below. There is already a function
that is called to build the sync that has the q, just build it
directly there and avoid going through the ent?

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 268da20baa4e9c..fa9cb0f49bf1ee 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -338,18 +338,6 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
 		break;
-	case CMDQ_OP_CMD_SYNC:
-		if (ent->sync.cs_none) {
-			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
-		} else if (ent->sync.msiaddr) {
-			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
-			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
-		} else {
-			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
-		}
-		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
-		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
-		break;
 	default:
 		return -ENOENT;
 	}
@@ -367,25 +355,30 @@ static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
 }
 
 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
-					 struct arm_smmu_queue *q, u32 prod)
+					 struct arm_smmu_queue *q, u32 prod,
+					 bool msi)
 {
-	struct arm_smmu_cmdq_ent ent = {
-		.opcode = CMDQ_OP_CMD_SYNC,
-	};
+	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
+	cmd[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) |
+		 FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
+		 FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+	if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY) {
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
+		return;
+	}
+
+	if (!msi || !(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
+		return;
+	}
 
 	/*
 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
 	 * payload, so the write will zero the entire command on that platform.
 	 */
-	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
-		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
-				   q->ent_dwords * 8;
-	}
-
-	if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)
-		ent.sync.cs_none = true;
-
-	arm_smmu_cmdq_build_cmd(cmd, &ent);
+	cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
+	cmd[1] = (q->base_dma + Q_IDX(&q->llq, prod) * q->ent_dwords * 8) &
+		 CMDQ_SYNC_1_MSIADDR_MASK;
 }
 
 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
@@ -402,9 +395,6 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
 	u64 cmd[CMDQ_ENT_DWORDS];
 	u32 cons = readl_relaxed(q->cons_reg);
 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
-	struct arm_smmu_cmdq_ent cmd_sync = {
-		.opcode = CMDQ_OP_CMD_SYNC,
-	};
 
 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
@@ -437,11 +427,8 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
 
-	if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)
-		cmd_sync.sync.cs_none = true;
-
 	/* Convert the erroneous command into a CMD_SYNC */
-	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
+	arm_smmu_cmdq_build_sync_cmd(cmd, smmu, q, 0, false);
 
 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
@@ -812,7 +799,8 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
 	if (sync) {
 		prod = queue_inc_prod_n(&llq, n);
-		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
+		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod,
+					     true);
 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
 
 		/*
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 9412fa4ff5e045..b1ce1986e61101 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -520,10 +520,6 @@ struct arm_smmu_cmdq_ent {
 		} resume;
 
 		#define CMDQ_OP_CMD_SYNC	0x46
-		struct {
-			u64			msiaddr;
-			bool			cs_none;
-		} sync;
 	};
 };
Nicolin Chen April 30, 2024, 4:30 p.m. UTC | #2
On Tue, Apr 30, 2024 at 11:22:01AM -0300, Jason Gunthorpe wrote:
> On Mon, Apr 29, 2024 at 09:43:45PM -0700, Nicolin Chen wrote:
> > The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the
> > CS field of CMD_SYNC. Add a quirk flag to accommodate that.
> > 
> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++--
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  4 ++++
> >  2 files changed, 12 insertions(+), 2 deletions(-)
> 
> This seems fine, other than the misplaced hunk
> 
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> 
> But it might be tidier like the below. There is already a function
> that is called to build the sync that has the q, just build it
> directly there and avoid going through the ent?

Yea, and looks like we can have a patch tidying the existing sync
building function, and then another one adding CS_NONE.

>  static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
> -					 struct arm_smmu_queue *q, u32 prod)
> +					 struct arm_smmu_queue *q, u32 prod,
> +					 bool msi)
>  {
...
> +	if (!msi || !(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
> +		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
> +		return;
> +	}
...
> @@ -402,9 +395,6 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
>  	u64 cmd[CMDQ_ENT_DWORDS];
>  	u32 cons = readl_relaxed(q->cons_reg);
>  	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
> -	struct arm_smmu_cmdq_ent cmd_sync = {
> -		.opcode = CMDQ_OP_CMD_SYNC,
> -	};
>  
>  	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
>  		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
> @@ -437,11 +427,8 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
>  	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
>  		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
>  
> -	if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)
> -		cmd_sync.sync.cs_none = true;
> -
>  	/* Convert the erroneous command into a CMD_SYNC */
> -	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
> +	arm_smmu_cmdq_build_sync_cmd(cmd, smmu, q, 0, false);
>  
>  	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);

Here is the only caller for "msi=false". Maybe we could just do:
+	arm_smmu_cmdq_build_sync_cmd(cmd, smmu, q, cons);

So, no need of "bool msi"? It would slightly change the behavior
though, a SYNC for ARM_SMMU_OPT_MSIPOLL should be still a SYNC.

Thanks
Nicolin
Jason Gunthorpe April 30, 2024, 4:37 p.m. UTC | #3
On Tue, Apr 30, 2024 at 09:30:43AM -0700, Nicolin Chen wrote:
> >  	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
> 
> Here is the only caller for "msi=false". Maybe we could just do:
> +	arm_smmu_cmdq_build_sync_cmd(cmd, smmu, q, cons);
> 
> So, no need of "bool msi"? It would slightly change the behavior
> though, a SYNC for ARM_SMMU_OPT_MSIPOLL should be still a SYNC.

I don't know, I didn't try to figure out what to stick for prod in
that case. It is probably OK to convert an error entry into a MSI
sync if it works out?

Jason
Nicolin Chen April 30, 2024, 4:43 p.m. UTC | #4
On Tue, Apr 30, 2024 at 01:37:33PM -0300, Jason Gunthorpe wrote:
> On Tue, Apr 30, 2024 at 09:30:43AM -0700, Nicolin Chen wrote:
> > >  	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
> > 
> > Here is the only caller for "msi=false". Maybe we could just do:
> > +	arm_smmu_cmdq_build_sync_cmd(cmd, smmu, q, cons);
> > 
> > So, no need of "bool msi"? It would slightly change the behavior
> > though, a SYNC for ARM_SMMU_OPT_MSIPOLL should be still a SYNC.
> 
> I don't know, I didn't try to figure out what to stick for prod in
> that case. It is probably OK to convert an error entry into a MSI
> sync if it works out?

Yea. I think it should be okay. I will try a hack with an illegal
command to test it out.

Nicolin
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 6a7e6b1ba5f7..b3d03ca01adc 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -334,7 +334,9 @@  static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
 		break;
 	case CMDQ_OP_CMD_SYNC:
-		if (ent->sync.msiaddr) {
+		if (ent->sync.cs_none) {
+			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
+		} else if (ent->sync.msiaddr) {
 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
 		} else {
@@ -371,6 +373,9 @@  static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 				   q->ent_dwords * 8;
 	}
 
+	if (q->quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY)
+		ent.sync.cs_none = true;
+
 	arm_smmu_cmdq_build_cmd(cmd, &ent);
 }
 
@@ -708,7 +713,8 @@  static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 					 struct arm_smmu_cmdq *cmdq,
 					 struct arm_smmu_ll_queue *llq)
 {
-	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
+	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
+	    !(cmdq->q.quirks & CMDQ_QUIRK_SYNC_CS_NONE_ONLY))
 		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
 
 	return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 2a19bb63e5c6..bbee08e82943 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -510,6 +510,7 @@  struct arm_smmu_cmdq_ent {
 		#define CMDQ_OP_CMD_SYNC	0x46
 		struct {
 			u64			msiaddr;
+			bool			cs_none;
 		} sync;
 	};
 };
@@ -542,6 +543,9 @@  struct arm_smmu_queue {
 
 	u32 __iomem			*prod_reg;
 	u32 __iomem			*cons_reg;
+
+#define CMDQ_QUIRK_SYNC_CS_NONE_ONLY	BIT(0)	/* CMD_SYNC CS field supports CS_NONE only */
+	u32				quirks;
 };
 
 struct arm_smmu_queue_poll {