Message ID | 20230810211149.428158-1-grimm@linux.ibm.com |
---|---|
State | Superseded |
Headers | show |
Series | hw/imc: Detect BML and fix core counters | expand |
On 8/11/23 2:41 AM, Ryan Grimm wrote: > On systems running BML we started noticing this in the skiboot log: > > [ 409.088819302,3] XSCOM: write error gcid=0x0 pcb_addr=0x20000060 stat=0x4 > [ 409.088823446,3] ELOG: Error getting buffer to log error > [ 409.088824806,3] XSCOM: Write failed, ret = -26 > [ 409.088825797,3] IMC: error in xscom_write for pdbar > [ 0.468976][ T19] core_imc memory allocation for cpu 0 failed > [ 0.468993][ T1] IMC PMU core_imc Register failed > > I tracked down that bad pcb_addr to this line in the code: > > pdbar_addr = get_imc_scom_addr_for_quad(phys_core_id, > pdbar_scom_index[port_id]); > > I found that pdbar_scom_index was not initialized because, like mambo, we don't > have the IMC catalog in memory. So, in imc_init we error out and never > initialize it in setup_imc_scoms. > > This patch adds a chip quirk QUIRK_BML because it seems like a reasonable thing > to do and it's easy to put a BML {}; in the device tree like Mambo, Awan, etc. > > It is tested on a Rainier and errors are gone and /sys/devices/core_imc shows > up as expected. > > Signed-off-by: Ryan Grimm <grimm@linux.ibm.com> > --- > core/chip.c | 4 ++++ > hw/imc.c | 10 +++++----- > include/chip.h | 1 + > 3 files changed, 10 insertions(+), 5 deletions(-) > > diff --git a/core/chip.c b/core/chip.c > index cee65822..0e96e625 100644 > --- a/core/chip.c > +++ b/core/chip.c > @@ -190,6 +190,10 @@ void init_chips(void) > proc_chip_quirks |= QUIRK_QEMU | QUIRK_NO_DIRECT_CTL | QUIRK_NO_RNG; > prlog(PR_NOTICE, "CHIP: Detected QEMU simulator\n"); > } > + if (dt_find_by_path(dt_root, "/bml")) { > + proc_chip_quirks |= QUIRK_BML; > + prlog(PR_NOTICE, "CHIP: Detected BML\n"); > + } > > /* We walk the chips based on xscom nodes in the tree */ > dt_for_each_compatible(dt_root, xn, "ibm,xscom") { > diff --git a/hw/imc.c b/hw/imc.c > index 97e0809f..dc3a59ae 100644 > --- a/hw/imc.c > +++ b/hw/imc.c > @@ -475,7 +475,7 @@ void imc_catalog_preload(void) > int ret = OPAL_SUCCESS; > compress_buf_size = MAX_COMPRESSED_IMC_DTB_SIZE; > > - if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) > + if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML)) > return; > > /* Enable only for power 9/10 */ > @@ -613,13 +613,13 @@ void imc_init(void) > struct dt_node *dev; > int err_flag = -1; > > - if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) { > + if ((proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS) || QUIRK_BML)) { Guess idea is to check for MAMBO or BML bits set in proc_chip_quirks. But then why Close Parentheses after QUIRK_MAMBO_CALLOUTS? Maddy > dev = dt_find_compatible_node(dt_root, NULL, > "ibm,opal-in-memory-counters"); > if (!dev) > return; > > - goto imc_mambo; > + goto imc_mambo_bml; > } > > /* Enable only for power 9/10 */ > @@ -662,7 +662,7 @@ void imc_init(void) > goto err; > } > > -imc_mambo: > +imc_mambo_bml: > if (setup_imc_scoms()) { > prerror("IMC: Failed to setup the scoms\n"); > goto err; > @@ -683,7 +683,7 @@ imc_mambo: > /* Update the base_addr and chip-id for nest nodes */ > imc_dt_update_nest_node(dev); > > - if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) > + if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML)) > return; > > /* > diff --git a/include/chip.h b/include/chip.h > index cfa5ce35..c90b8a7f 100644 > --- a/include/chip.h > +++ b/include/chip.h > @@ -187,6 +187,7 @@ enum proc_chip_quirks { > QUIRK_NO_RNG = 0x00000100, > QUIRK_QEMU = 0x00000200, > QUIRK_AWAN = 0x00000400, > + QUIRK_BML = 0x00000800, > }; > > extern enum proc_chip_quirks proc_chip_quirks;
On Mon, 2023-08-14 at 09:28 +0530, Madhavan Srinivasan wrote: > On 8/11/23 2:41 AM, Ryan Grimm wrote: > > > > diff --git a/hw/imc.c b/hw/imc.c > > index 97e0809f..dc3a59ae 100644 > > --- a/hw/imc.c > > +++ b/hw/imc.c > > @@ -475,7 +475,7 @@ void imc_catalog_preload(void) > > int ret = OPAL_SUCCESS; > > compress_buf_size = MAX_COMPRESSED_IMC_DTB_SIZE; > > > > - if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) > > + if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML)) > > return; > > > > /* Enable only for power 9/10 */ > > @@ -613,13 +613,13 @@ void imc_init(void) > > struct dt_node *dev; > > int err_flag = -1; > > > > - if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) { > > + if ((proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS) || QUIRK_BML)) { > > Guess idea is to check for MAMBO or BML bits set in proc_chip_quirks. > But then why Close Parentheses after QUIRK_MAMBO_CALLOUTS? > Oh, oops, unintentional extra parenthesis, will fix, thanks! Ryan > Maddy > > > dev = dt_find_compatible_node(dt_root, NULL, > > "ibm,opal-in-memory-counters"); > > if (!dev) > > return; > > > > - goto imc_mambo; > > + goto imc_mambo_bml; > > } > > > > /* Enable only for power 9/10 */ > > @@ -662,7 +662,7 @@ void imc_init(void) > > goto err; > > } > > > > -imc_mambo: > > +imc_mambo_bml: > > if (setup_imc_scoms()) { > > prerror("IMC: Failed to setup the scoms\n"); > > goto err; > > @@ -683,7 +683,7 @@ imc_mambo: > > /* Update the base_addr and chip-id for nest nodes */ > > imc_dt_update_nest_node(dev); > > > > - if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) > > + if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML)) > > return; > > > > /* > > diff --git a/include/chip.h b/include/chip.h > > index cfa5ce35..c90b8a7f 100644 > > --- a/include/chip.h > > +++ b/include/chip.h > > @@ -187,6 +187,7 @@ enum proc_chip_quirks { > > QUIRK_NO_RNG = 0x00000100, > > QUIRK_QEMU = 0x00000200, > > QUIRK_AWAN = 0x00000400, > > + QUIRK_BML = 0x00000800, > > }; > > > > extern enum proc_chip_quirks proc_chip_quirks;
diff --git a/core/chip.c b/core/chip.c index cee65822..0e96e625 100644 --- a/core/chip.c +++ b/core/chip.c @@ -190,6 +190,10 @@ void init_chips(void) proc_chip_quirks |= QUIRK_QEMU | QUIRK_NO_DIRECT_CTL | QUIRK_NO_RNG; prlog(PR_NOTICE, "CHIP: Detected QEMU simulator\n"); } + if (dt_find_by_path(dt_root, "/bml")) { + proc_chip_quirks |= QUIRK_BML; + prlog(PR_NOTICE, "CHIP: Detected BML\n"); + } /* We walk the chips based on xscom nodes in the tree */ dt_for_each_compatible(dt_root, xn, "ibm,xscom") { diff --git a/hw/imc.c b/hw/imc.c index 97e0809f..dc3a59ae 100644 --- a/hw/imc.c +++ b/hw/imc.c @@ -475,7 +475,7 @@ void imc_catalog_preload(void) int ret = OPAL_SUCCESS; compress_buf_size = MAX_COMPRESSED_IMC_DTB_SIZE; - if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) + if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML)) return; /* Enable only for power 9/10 */ @@ -613,13 +613,13 @@ void imc_init(void) struct dt_node *dev; int err_flag = -1; - if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) { + if ((proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS) || QUIRK_BML)) { dev = dt_find_compatible_node(dt_root, NULL, "ibm,opal-in-memory-counters"); if (!dev) return; - goto imc_mambo; + goto imc_mambo_bml; } /* Enable only for power 9/10 */ @@ -662,7 +662,7 @@ void imc_init(void) goto err; } -imc_mambo: +imc_mambo_bml: if (setup_imc_scoms()) { prerror("IMC: Failed to setup the scoms\n"); goto err; @@ -683,7 +683,7 @@ imc_mambo: /* Update the base_addr and chip-id for nest nodes */ imc_dt_update_nest_node(dev); - if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) + if (proc_chip_quirks & (QUIRK_MAMBO_CALLOUTS | QUIRK_BML)) return; /* diff --git a/include/chip.h b/include/chip.h index cfa5ce35..c90b8a7f 100644 --- a/include/chip.h +++ b/include/chip.h @@ -187,6 +187,7 @@ enum proc_chip_quirks { QUIRK_NO_RNG = 0x00000100, QUIRK_QEMU = 0x00000200, QUIRK_AWAN = 0x00000400, + QUIRK_BML = 0x00000800, }; extern enum proc_chip_quirks proc_chip_quirks;
On systems running BML we started noticing this in the skiboot log: [ 409.088819302,3] XSCOM: write error gcid=0x0 pcb_addr=0x20000060 stat=0x4 [ 409.088823446,3] ELOG: Error getting buffer to log error [ 409.088824806,3] XSCOM: Write failed, ret = -26 [ 409.088825797,3] IMC: error in xscom_write for pdbar [ 0.468976][ T19] core_imc memory allocation for cpu 0 failed [ 0.468993][ T1] IMC PMU core_imc Register failed I tracked down that bad pcb_addr to this line in the code: pdbar_addr = get_imc_scom_addr_for_quad(phys_core_id, pdbar_scom_index[port_id]); I found that pdbar_scom_index was not initialized because, like mambo, we don't have the IMC catalog in memory. So, in imc_init we error out and never initialize it in setup_imc_scoms. This patch adds a chip quirk QUIRK_BML because it seems like a reasonable thing to do and it's easy to put a BML {}; in the device tree like Mambo, Awan, etc. It is tested on a Rainier and errors are gone and /sys/devices/core_imc shows up as expected. Signed-off-by: Ryan Grimm <grimm@linux.ibm.com> --- core/chip.c | 4 ++++ hw/imc.c | 10 +++++----- include/chip.h | 1 + 3 files changed, 10 insertions(+), 5 deletions(-)