@@ -44,10 +44,10 @@ CAPP-PSL transactions.
Notes:
-----
-* If PHB is in PEC2 then requesting mode `OPAL_PHB_CAPI_MODE_DMA_TVT1` will
- allocate extra 16/8 dma read engines to the PHB depending on its stack
- (stack 0/ stack 1). This is needed to improve the Direct-GPU DMA read
- performance for the Mellanox CX5 card.
+* On a Witherspoon system if PHB is in PEC2 then requesting mode
+ `OPAL_PHB_CAPI_MODE_DMA_TVT1` will allocate extra 16/8 dma read engines to the
+ PHB depending on its stack (stack 0/ stack 1). This is needed to improve the
+ Direct-GPU DMA read performance for the Mellanox CX5 card.
* Mode `OPAL_PHB_CAPI_MODE_PCIE` not yet supported on Power-9.
* Requesting mode `OPAL_PHB_CAPI_MODE_CAPI` on Power-9 will disable fast-reboot.
* Modes `OPAL_PHB_CAPI_MODE_DMA`, `OPAL_PHB_CAPI_MODE_SNOOP_OFF` are
@@ -148,6 +148,9 @@ static void phb4_init_hw(struct phb4 *p);
#define PHB4_CAN_STORE_EOI(p) \
(XIVE_STORE_EOI_ENABLED && ((p)->rev >= PHB4_REV_NIMBUS_DD20))
+/* Are we running on a Witherspoon system */
+#define IS_WITHERSPOON() (strcmp(platform.name, "Witherspoon") == 0)
+
static bool verbose_eeh;
static bool pci_tracing;
static bool pci_eeh_mmio;
@@ -3937,24 +3940,29 @@ static void phb4_init_capp_regs(struct phb4 *p, uint32_t capp_eng)
0xDCE0280428000000);
}
- /* capp owns PHB read buffers */
- if (p->index == CAPP0_PHB_INDEX) {
+
+ /* assigned capp owned PHB read buffers */
+ reg = 0;
+ if (capp_eng & CAPP_MAX_DMA_READ_ENGINES) {
+ /* In case of Mellanox CX5 card on witherspoon assign
+ * just 4 phb read buffers to CAPP. On other systems allocate
+ * 8 read phb read buffers
+ */
+ reg = IS_WITHERSPOON() ? 0xF000000000000000 : /*4 Read buffers*/
+ 0xFF00000000000000; /*8 PHB Read buffers*/
+
+ } else if (p->index == CAPP0_PHB_INDEX) {
/* max PHB read buffers 0-47 */
reg = 0xFFFFFFFFFFFF0000;
- if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
- reg = 0xF000000000000000;
- xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
- xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
- }
- if (p->index == CAPP1_PHB_INDEX) {
+
+ } else if (p->index == CAPP1_PHB_INDEX) {
/* Set 30 Read machines for CAPP Minus 20-27 for DMA */
reg = 0xFFFFF00E00000000;
- if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
- reg = 0xF000000000000000;
- xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
- xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
}
+ xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
+ xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
+
/* CAPP FIR Action 0 */
xscom_write(p->chip_id, CAPP_FIR_ACTION0 + offset, 0x0b1c000104060000);
@@ -4111,8 +4119,13 @@ static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number,
/* CAPP Control Register. Enable CAPP Mode */
reg = 0x8000000000000000ULL; /* PEC works in CAPP Mode */
reg |= stq_eng;
- if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
- dma_eng = 0x0000F00000000000ULL; /* 4 CAPP Read machines */
+ if (capp_eng & CAPP_MAX_DMA_READ_ENGINES) {
+ /* For Mellanox CX5 running on witherspoon allocate 4 CAPP read
+ * machines. On other systems allocate 8 CAPP Read machines
+ */
+ dma_eng = IS_WITHERSPOON() ? 0x0000F00000000000ULL :
+ 0x0000FF0000000000ULL;
+ }
reg |= dma_eng;
xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, reg);
@@ -4120,9 +4133,11 @@ static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number,
* x8+x8 (bifurcated) or x8+x4+x4 (trifurcated) mode. When
* Mellanox CX5 card is attached to stack0 of this PEC, indicated by
* request to allocate CAPP_MAX_DMA_READ_ENGINES; we tweak the default
- * dma-read engines allocations to maximize the DMA read performance
+ * dma-read engines allocations to maximize the DMA read performance.
+ * Do this only on a witherspoon system.
*/
- if ((p->index == CAPP1_PHB_INDEX) &&
+ if (IS_WITHERSPOON() &&
+ (p->index == CAPP1_PHB_INDEX) &&
(capp_eng & CAPP_MAX_DMA_READ_ENGINES)) {
/*
Patch 5690c5a8980f("phb4: Reallocate PEC2 DMA-Read engines to improve GPU-Direct bandwidth") introduced allocation of extra DMA-read engines for improving Mellanox CX5 GPU-Direct bandwidth. At present CX5 is the only card thats using these optimizations so these changes will only impact Witherspoon systems. However hardware team has raised the possibility of other non-witherspoon systems in future that may be using a similar card, where these optimizations wont be needed. So they have asked us to make these changes Witherspoon specific. Hence this patch updates the phb4_init_capp_regs() & enable_capi_mode() to configure the extra DMA-read engine allocation if and only if skiboot is running on Witherspoon platform. Cc: stable #6.0.6+ Fixes: 5690c5a8980f("phb4: Reallocate PEC2 DMA-Read engines to improve GPU-Direct bandwidth") Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com> --- Change-log: Resend -> Updated the request for merge to stable from 5.0.6+ to 6.0.6+ --- .../opal-pci-set-phb-capi-mode-93.rst | 8 ++-- hw/phb4.c | 47 ++++++++++++------- 2 files changed, 35 insertions(+), 20 deletions(-)