Message ID | 1560373737-21649-8-git-send-email-arbab@linux.ibm.com |
---|---|
State | Superseded |
Headers | show |
Series | Support the updated NPU in POWER9P; npu3 | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch master (db3929ee4f0a98596938f05da2789686908ebfd4) |
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot | fail | Test snowpatch/job/snowpatch-skiboot on branch master |
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco | success | Signed-off-by present |
On Wed, 2019-06-12 at 16:08 -0500, Reza Arbab wrote: > Signed-off-by: Reza Arbab <arbab@linux.ibm.com> > --- > platforms/astbmc/Makefile.inc | 2 +- > platforms/astbmc/swift.c | 141 > ++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 142 insertions(+), 1 deletion(-) > create mode 100644 platforms/astbmc/swift.c > > diff --git a/platforms/astbmc/Makefile.inc b/platforms/astbmc/Makefile.inc > index b95ef542b47f..bc2ff2ce640d 100644 > --- a/platforms/astbmc/Makefile.inc > +++ b/platforms/astbmc/Makefile.inc > @@ -5,7 +5,7 @@ ASTBMC_OBJS = pnor.o common.o slots.o \ > p8dtu.o p8dnu.o \ > garrison.o barreleye.o \ > witherspoon.o zaius.o romulus.o p9dsu.o vesnin.o \ > - talos.o > + talos.o swift.o > > ASTBMC = $(PLATDIR)/astbmc/built-in.a > $(ASTBMC): $(ASTBMC_OBJS:%=$(PLATDIR)/astbmc/%) > diff --git a/platforms/astbmc/swift.c b/platforms/astbmc/swift.c > new file mode 100644 > index 000000000000..821f3a7257df > --- /dev/null > +++ b/platforms/astbmc/swift.c > @@ -0,0 +1,141 @@ > +/* Copyright 2019 IBM Corp. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > + * implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <skiboot.h> > +#include <ipmi.h> > +#include <npu3.h> > +#include "astbmc.h" > + > +static void swift_npu3_device_detect(struct npu3 *npu) > +{ > + struct npu3_dev *dev; > + uint32_t node, gpu_index; > + char slot[6]; > + > + node = P9_GCID2NODEID(npu->chip_id); > + > + switch (npu->index) { > + case 0: > + gpu_index = node * 2 + 1; > + break; > + case 2: > + gpu_index = node * 2; > + break; > + default: > + return; > + } > + > + snprintf(slot, sizeof(slot), "GPU%d", gpu_index); > + > + npu3_for_each_dev(dev, npu) { > + dev->type = NPU3_DEV_TYPE_NVLINK; > + dt_add_property_string(dev->dn, "ibm,slot-label", slot); > + dt_add_property_u64(dev->dn, "ibm,link-speed", 25000000000ull); > + dt_add_property_cells(dev->dn, "nvidia,link-speed", 9); > + } > +} > + > +#define DN(g) devs[g]->nvlink.gpu->dn > +#define G(g) (devs[g] ? devs[g]->nvlink.gpu->dn->phandle : 0) > +#define N(g) (devs[g] ? devs[g]->npu->nvlink.phb.dt_node->phandle : 0) > + > +#define add_peers_prop(g, p...) > \ > + if (devs[g]) \ > + dt_add_property_cells(DN(g), "ibm,nvlink-peers", ##p) > + > +/* Add GPU interconnect properties to the dt */ > +static void swift_npu3_fixup(void) > +{ > + struct npu3 *npu; > + struct npu3_dev *dev; > + struct npu3_dev *devs[4] = {}; > + uint32_t index; > + > + if (nr_chips() != 2 || npu3_chip_possible_gpus() != 2) { > + prlog(PR_ERR, "NPU: Unknown link topology detected\n"); > + return; > + } Anyway we can support 1 chip? Especially early on we have systems populated with just 1 chip and it's nice to still be able to work there. I guess we still boot, we just don't get NPUs. > + > + /* Collect the first link we find for each GPU */ > + npu3_for_each_nvlink_npu(npu) { > + npu3_for_each_nvlink_dev(dev, npu) { > + index = npu3_dev_gpu_index(dev); > + if (index == -1 || index >= ARRAY_SIZE(devs)) > + continue; > + > + if (dev->nvlink.gpu && !devs[index]) > + devs[index] = dev; > + } > + } > + > + add_peers_prop(0, G(3), G(3), > + G(2), G(2), G(2), > + G(1), G(1), G(1), > + N(0), N(0), N(0), N(0)); > + > + add_peers_prop(1, G(2), > + G(3), G(3), G(3), > + G(0), G(0), G(0), > + G(2), > + N(1), N(1), N(1), N(1)); > + > + add_peers_prop(2, G(1), > + G(3), G(3), G(3), > + G(0), G(0), G(0), > + G(1), > + N(2), N(2), N(2), N(2)); > + > + add_peers_prop(3, G(2), G(2), G(2), > + G(1), G(1), G(1), > + G(0), G(0), > + N(3), N(3), N(3), N(3)); > +} > + > +static void swift_exit(void) > +{ > + swift_npu3_fixup(); > + astbmc_exit(); > +} > + > +static bool swift_probe(void) > +{ > + if (!dt_node_is_compatible(dt_root, "ibm,swift")) > + return false; > + > + /* Lot of common early inits here */ > + astbmc_early_init(); > + > + /* Setup UART for use by OPAL (Linux hvc) */ > + uart_set_console_policy(UART_CONSOLE_OPAL); > + > + return true; > +} > + > +DECLARE_PLATFORM(swift) = { > + .bmc = &bmc_plat_ast2500_openbmc, > + .cec_power_down = astbmc_ipmi_power_down, > + .cec_reboot = astbmc_ipmi_reboot, > + .elog_commit = ipmi_elog_commit, > + .exit = swift_exit, > + .init = astbmc_init, > + .name = "Swift", > + .npu3_device_detect = swift_npu3_device_detect, > + .pci_get_slot_info = dt_slot_get_slot_info, > + .probe = swift_probe, > + .resource_loaded = flash_resource_loaded, > + .start_preload_resource = flash_start_preload_resource, > + .terminate = ipmi_terminate, > +};
On Thu, Jun 13, 2019 at 10:59:22AM +1000, Michael Neuling wrote: >On Wed, 2019-06-12 at 16:08 -0500, Reza Arbab wrote: >> + if (nr_chips() != 2 || npu3_chip_possible_gpus() != 2) { >> + prlog(PR_ERR, "NPU: Unknown link topology detected\n"); >> + return; >> + } > >Anyway we can support 1 chip? Especially early on we have systems populated >with just 1 chip and it's nice to still be able to work there. > >I guess we still boot, we just don't get NPUs. This was just sort of a clunky sanity check that we're really on a four-GPU capable system. It didn't occur to me that a Swift might only have one chip. Checking that we're a two-GPU per chip system is good enough, so I'll just drop the nr_chips() part.
On 13/06/2019 07:08, Reza Arbab wrote: > Signed-off-by: Reza Arbab <arbab@linux.ibm.com> > --- > platforms/astbmc/Makefile.inc | 2 +- > platforms/astbmc/swift.c | 141 ++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 142 insertions(+), 1 deletion(-) > create mode 100644 platforms/astbmc/swift.c > > diff --git a/platforms/astbmc/Makefile.inc b/platforms/astbmc/Makefile.inc > index b95ef542b47f..bc2ff2ce640d 100644 > --- a/platforms/astbmc/Makefile.inc > +++ b/platforms/astbmc/Makefile.inc > @@ -5,7 +5,7 @@ ASTBMC_OBJS = pnor.o common.o slots.o \ > p8dtu.o p8dnu.o \ > garrison.o barreleye.o \ > witherspoon.o zaius.o romulus.o p9dsu.o vesnin.o \ > - talos.o > + talos.o swift.o > > ASTBMC = $(PLATDIR)/astbmc/built-in.a > $(ASTBMC): $(ASTBMC_OBJS:%=$(PLATDIR)/astbmc/%) > diff --git a/platforms/astbmc/swift.c b/platforms/astbmc/swift.c > new file mode 100644 > index 000000000000..821f3a7257df > --- /dev/null > +++ b/platforms/astbmc/swift.c > @@ -0,0 +1,141 @@ > +/* Copyright 2019 IBM Corp. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > + * implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <skiboot.h> > +#include <ipmi.h> > +#include <npu3.h> > +#include "astbmc.h" > + > +static void swift_npu3_device_detect(struct npu3 *npu) > +{ > + struct npu3_dev *dev; > + uint32_t node, gpu_index; > + char slot[6]; > + > + node = P9_GCID2NODEID(npu->chip_id); > + > + switch (npu->index) { > + case 0: > + gpu_index = node * 2 + 1; > + break; > + case 2: > + gpu_index = node * 2; > + break; > + default: > + return; > + } > + > + snprintf(slot, sizeof(slot), "GPU%d", gpu_index); > + > + npu3_for_each_dev(dev, npu) { > + dev->type = NPU3_DEV_TYPE_NVLINK; > + dt_add_property_string(dev->dn, "ibm,slot-label", slot); > + dt_add_property_u64(dev->dn, "ibm,link-speed", 25000000000ull); > + dt_add_property_cells(dev->dn, "nvidia,link-speed", 9); > + } > +} > + > +#define DN(g) devs[g]->nvlink.gpu->dn > +#define G(g) (devs[g] ? devs[g]->nvlink.gpu->dn->phandle : 0) > +#define N(g) (devs[g] ? devs[g]->npu->nvlink.phb.dt_node->phandle : 0) > + > +#define add_peers_prop(g, p...) \ > + if (devs[g]) \ > + dt_add_property_cells(DN(g), "ibm,nvlink-peers", ##p) > + > +/* Add GPU interconnect properties to the dt */ > +static void swift_npu3_fixup(void) > +{ > + struct npu3 *npu; > + struct npu3_dev *dev; > + struct npu3_dev *devs[4] = {}; So there are 4 interconnected GPUs per cluster now? Please define this 4 as a macro. And... > + uint32_t index; > + > + if (nr_chips() != 2 || npu3_chip_possible_gpus() != 2) { > + prlog(PR_ERR, "NPU: Unknown link topology detected\n"); > + return; > + } > + > + /* Collect the first link we find for each GPU */ > + npu3_for_each_nvlink_npu(npu) { > + npu3_for_each_nvlink_dev(dev, npu) { > + index = npu3_dev_gpu_index(dev); > + if (index == -1 || index >= ARRAY_SIZE(devs)) > + continue; > + > + if (dev->nvlink.gpu && !devs[index]) > + devs[index] = dev; > + } > + } > + > + add_peers_prop(0, G(3), G(3), > + G(2), G(2), G(2), > + G(1), G(1), G(1), > + N(0), N(0), N(0), N(0)); ... also 12 links per a GPU? Just curiosity. Thanks, > + > + add_peers_prop(1, G(2), > + G(3), G(3), G(3), > + G(0), G(0), G(0), > + G(2), > + N(1), N(1), N(1), N(1)); > + > + add_peers_prop(2, G(1), > + G(3), G(3), G(3), > + G(0), G(0), G(0), > + G(1), > + N(2), N(2), N(2), N(2)); > + > + add_peers_prop(3, G(2), G(2), G(2), > + G(1), G(1), G(1), > + G(0), G(0), > + N(3), N(3), N(3), N(3)); > +} > + > +static void swift_exit(void) > +{ > + swift_npu3_fixup(); > + astbmc_exit(); > +} > + > +static bool swift_probe(void) > +{ > + if (!dt_node_is_compatible(dt_root, "ibm,swift")) > + return false; > + > + /* Lot of common early inits here */ > + astbmc_early_init(); > + > + /* Setup UART for use by OPAL (Linux hvc) */ > + uart_set_console_policy(UART_CONSOLE_OPAL); > + > + return true; > +} > + > +DECLARE_PLATFORM(swift) = { > + .bmc = &bmc_plat_ast2500_openbmc, > + .cec_power_down = astbmc_ipmi_power_down, > + .cec_reboot = astbmc_ipmi_reboot, > + .elog_commit = ipmi_elog_commit, > + .exit = swift_exit, > + .init = astbmc_init, > + .name = "Swift", > + .npu3_device_detect = swift_npu3_device_detect, > + .pci_get_slot_info = dt_slot_get_slot_info, > + .probe = swift_probe, > + .resource_loaded = flash_resource_loaded, > + .start_preload_resource = flash_start_preload_resource, > + .terminate = ipmi_terminate, > +}; >
On Wed, Jun 26, 2019 at 04:38:10PM +1000, Alexey Kardashevskiy wrote: >On 13/06/2019 07:08, Reza Arbab wrote: >> +/* Add GPU interconnect properties to the dt */ >> +static void swift_npu3_fixup(void) >> +{ >> + struct npu3 *npu; >> + struct npu3_dev *dev; >> + struct npu3_dev *devs[4] = {}; > >So there are 4 interconnected GPUs per cluster now? Please define this 4 >as a macro. And... Right. All four GPUs are interconnected now. This is different from Witherspoon, where GPUs are only interconnected to other GPUs from the same chip. I'll use a macro to make it more clear what the 4 represents. >> + uint32_t index; >> + >> + if (nr_chips() != 2 || npu3_chip_possible_gpus() != 2) { >> + prlog(PR_ERR, "NPU: Unknown link topology detected\n"); >> + return; >> + } >> + >> + /* Collect the first link we find for each GPU */ >> + npu3_for_each_nvlink_npu(npu) { >> + npu3_for_each_nvlink_dev(dev, npu) { >> + index = npu3_dev_gpu_index(dev); >> + if (index == -1 || index >= ARRAY_SIZE(devs)) >> + continue; >> + >> + if (dev->nvlink.gpu && !devs[index]) >> + devs[index] = dev; >> + } >> + } >> + >> + add_peers_prop(0, G(3), G(3), >> + G(2), G(2), G(2), >> + G(1), G(1), G(1), >> + N(0), N(0), N(0), N(0)); > > >... also 12 links per a GPU? Just curiosity. Thanks, Yes. Each NPU has 12 links, and each GPU has 12 links. >> + >> + add_peers_prop(1, G(2), >> + G(3), G(3), G(3), >> + G(0), G(0), G(0), >> + G(2), >> + N(1), N(1), N(1), N(1)); >> + >> + add_peers_prop(2, G(1), >> + G(3), G(3), G(3), >> + G(0), G(0), G(0), >> + G(1), >> + N(2), N(2), N(2), N(2)); >> + >> + add_peers_prop(3, G(2), G(2), G(2), >> + G(1), G(1), G(1), >> + G(0), G(0), >> + N(3), N(3), N(3), N(3)); >> +}
diff --git a/platforms/astbmc/Makefile.inc b/platforms/astbmc/Makefile.inc index b95ef542b47f..bc2ff2ce640d 100644 --- a/platforms/astbmc/Makefile.inc +++ b/platforms/astbmc/Makefile.inc @@ -5,7 +5,7 @@ ASTBMC_OBJS = pnor.o common.o slots.o \ p8dtu.o p8dnu.o \ garrison.o barreleye.o \ witherspoon.o zaius.o romulus.o p9dsu.o vesnin.o \ - talos.o + talos.o swift.o ASTBMC = $(PLATDIR)/astbmc/built-in.a $(ASTBMC): $(ASTBMC_OBJS:%=$(PLATDIR)/astbmc/%) diff --git a/platforms/astbmc/swift.c b/platforms/astbmc/swift.c new file mode 100644 index 000000000000..821f3a7257df --- /dev/null +++ b/platforms/astbmc/swift.c @@ -0,0 +1,141 @@ +/* Copyright 2019 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <ipmi.h> +#include <npu3.h> +#include "astbmc.h" + +static void swift_npu3_device_detect(struct npu3 *npu) +{ + struct npu3_dev *dev; + uint32_t node, gpu_index; + char slot[6]; + + node = P9_GCID2NODEID(npu->chip_id); + + switch (npu->index) { + case 0: + gpu_index = node * 2 + 1; + break; + case 2: + gpu_index = node * 2; + break; + default: + return; + } + + snprintf(slot, sizeof(slot), "GPU%d", gpu_index); + + npu3_for_each_dev(dev, npu) { + dev->type = NPU3_DEV_TYPE_NVLINK; + dt_add_property_string(dev->dn, "ibm,slot-label", slot); + dt_add_property_u64(dev->dn, "ibm,link-speed", 25000000000ull); + dt_add_property_cells(dev->dn, "nvidia,link-speed", 9); + } +} + +#define DN(g) devs[g]->nvlink.gpu->dn +#define G(g) (devs[g] ? devs[g]->nvlink.gpu->dn->phandle : 0) +#define N(g) (devs[g] ? devs[g]->npu->nvlink.phb.dt_node->phandle : 0) + +#define add_peers_prop(g, p...) \ + if (devs[g]) \ + dt_add_property_cells(DN(g), "ibm,nvlink-peers", ##p) + +/* Add GPU interconnect properties to the dt */ +static void swift_npu3_fixup(void) +{ + struct npu3 *npu; + struct npu3_dev *dev; + struct npu3_dev *devs[4] = {}; + uint32_t index; + + if (nr_chips() != 2 || npu3_chip_possible_gpus() != 2) { + prlog(PR_ERR, "NPU: Unknown link topology detected\n"); + return; + } + + /* Collect the first link we find for each GPU */ + npu3_for_each_nvlink_npu(npu) { + npu3_for_each_nvlink_dev(dev, npu) { + index = npu3_dev_gpu_index(dev); + if (index == -1 || index >= ARRAY_SIZE(devs)) + continue; + + if (dev->nvlink.gpu && !devs[index]) + devs[index] = dev; + } + } + + add_peers_prop(0, G(3), G(3), + G(2), G(2), G(2), + G(1), G(1), G(1), + N(0), N(0), N(0), N(0)); + + add_peers_prop(1, G(2), + G(3), G(3), G(3), + G(0), G(0), G(0), + G(2), + N(1), N(1), N(1), N(1)); + + add_peers_prop(2, G(1), + G(3), G(3), G(3), + G(0), G(0), G(0), + G(1), + N(2), N(2), N(2), N(2)); + + add_peers_prop(3, G(2), G(2), G(2), + G(1), G(1), G(1), + G(0), G(0), + N(3), N(3), N(3), N(3)); +} + +static void swift_exit(void) +{ + swift_npu3_fixup(); + astbmc_exit(); +} + +static bool swift_probe(void) +{ + if (!dt_node_is_compatible(dt_root, "ibm,swift")) + return false; + + /* Lot of common early inits here */ + astbmc_early_init(); + + /* Setup UART for use by OPAL (Linux hvc) */ + uart_set_console_policy(UART_CONSOLE_OPAL); + + return true; +} + +DECLARE_PLATFORM(swift) = { + .bmc = &bmc_plat_ast2500_openbmc, + .cec_power_down = astbmc_ipmi_power_down, + .cec_reboot = astbmc_ipmi_reboot, + .elog_commit = ipmi_elog_commit, + .exit = swift_exit, + .init = astbmc_init, + .name = "Swift", + .npu3_device_detect = swift_npu3_device_detect, + .pci_get_slot_info = dt_slot_get_slot_info, + .probe = swift_probe, + .resource_loaded = flash_resource_loaded, + .start_preload_resource = flash_start_preload_resource, + .terminate = ipmi_terminate, +};
Signed-off-by: Reza Arbab <arbab@linux.ibm.com> --- platforms/astbmc/Makefile.inc | 2 +- platforms/astbmc/swift.c | 141 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 platforms/astbmc/swift.c