diff mbox series

[3/6] hw/npu2: Common NPU2 init routine between NVLink and OpenCAPI

Message ID 8581104cbc1827507ca09a341ec2e7a1b8b61872.1534495426.git-series.andrew.donnellan@au1.ibm.com
State Superseded
Headers show
Series OpenCAPI support for Witherspoon | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied

Commit Message

Andrew Donnellan Aug. 17, 2018, 8:44 a.m. UTC
Replace probe_npu2() and probe_npu2_opencapi() with a new shared
probe_npu2(). Refactor some of the common NPU setup code into shared code.

No functional change. This patch does not implement support for using both
types of devices simultaneously on the same NPU - we expect to add this
sometime in the future.

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
---
 core/init.c        |   2 +-
 hw/npu2-common.c   | 120 +++++++++++++++++++++++-
 hw/npu2-opencapi.c | 169 +++++++++++----------------------
 hw/npu2.c          | 236 ++++++++++++++--------------------------------
 include/npu2.h     |   6 +-
 include/skiboot.h  |   1 +-
 6 files changed, 259 insertions(+), 275 deletions(-)

Comments

Reza Arbab Aug. 20, 2018, 4:43 p.m. UTC | #1
On Fri, Aug 17, 2018 at 06:44:38PM +1000, Andrew Donnellan wrote:
>Replace probe_npu2() and probe_npu2_opencapi() with a new shared
>probe_npu2(). Refactor some of the common NPU setup code into shared code.
>
>No functional change. This patch does not implement support for using both
>types of devices simultaneously on the same NPU - we expect to add this
>sometime in the future.

Verified that with this patchset, everything still seems to work fine 
(on a 4-GPU Redbud, at least).

[snip]
>@@ -1403,64 +1372,56 @@ static void npu2_probe_phb(struct dt_node *dn)
> 	 *
> 	 * Obviously if the year is now 2020 that didn't happen and you
> 	 * should fix this :-) */
>-	xscom_write_mask(gcid, 0x5011000, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011030, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011060, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011090, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011200, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011230, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011260, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011290, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011400, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011430, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011460, PPC_BIT(58), PPC_BIT(58));
>-	xscom_write_mask(gcid, 0x5011490, PPC_BIT(58), PPC_BIT(58));
>-
>-	xscom_write_mask(gcid, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
>-	xscom_write_mask(gcid, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
>-	xscom_write_mask(gcid, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
>-	xscom_write_mask(gcid, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
>-	xscom_write_mask(gcid, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
>-	xscom_write_mask(gcid, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
>+	xscom_write_mask(npu->chip_id, 0x5011000, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011030, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011060, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011090, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011200, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011230, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011260, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011290, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011400, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011430, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011460, PPC_BIT(58), PPC_BIT(58));
>+	xscom_write_mask(npu->chip_id, 0x5011490, PPC_BIT(58), PPC_BIT(58));
>+
>+	xscom_write_mask(npu->chip_id, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
>+	xscom_write_mask(npu->chip_id, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
>+	xscom_write_mask(npu->chip_id, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
>+	xscom_write_mask(npu->chip_id, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
>+	xscom_write_mask(npu->chip_id, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
>+	xscom_write_mask(npu->chip_id, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
>
> 	val = NPU2_NTL_MISC_CFG2_BRICK_ENABLE |
> 	      NPU2_NTL_MISC_CFG2_NDL_TX_PARITY_ENA |
> 	      NPU2_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA |
> 	      NPU2_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA;
>-	xscom_write_mask(gcid, 0x5011110, val, val);
>-	xscom_write_mask(gcid, 0x5011130, val, val);
>-	xscom_write_mask(gcid, 0x5011310, val, val);
>-	xscom_write_mask(gcid, 0x5011330, val, val);
>-	xscom_write_mask(gcid, 0x5011510, val, val);
>-	xscom_write_mask(gcid, 0x5011530, val, val);
>+	xscom_write_mask(npu->chip_id, 0x5011110, val, val);
>+	xscom_write_mask(npu->chip_id, 0x5011130, val, val);
>+	xscom_write_mask(npu->chip_id, 0x5011310, val, val);
>+	xscom_write_mask(npu->chip_id, 0x5011330, val, val);
>+	xscom_write_mask(npu->chip_id, 0x5011510, val, val);
>+	xscom_write_mask(npu->chip_id, 0x5011530, val, val);
>
> 	val = PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11);
>-	xscom_write_mask(gcid, 0x5011009, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011039, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011069, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011099, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011209, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011239, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011269, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011299, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011409, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011439, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011469, val, PPC_BITMASK(6,11));
>-	xscom_write_mask(gcid, 0x5011499, val, PPC_BITMASK(6,11));
>-
>-	index = dt_prop_get_u32(dn, "ibm,npu-index");
>-	phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
>-	links = dt_prop_get_u32(dn, "ibm,npu-links");
>-	prlog(PR_INFO, "NPU: Chip %d Found NPU2#%d (%d links) at %s\n",
>-	      gcid, index, links, path);
>-	free(path);
>-
>-	/* Retrieve scom base address */
>-	scom = dt_get_address(dn, 0, NULL);
>-	prlog(PR_INFO, "   SCOM Base:  %08x\n", scom);
>+	xscom_write_mask(npu->chip_id, 0x5011009, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011039, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011069, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011099, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011209, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011239, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011269, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011299, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011409, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011439, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011469, val, PPC_BITMASK(6,11));
>+	xscom_write_mask(npu->chip_id, 0x5011499, val, PPC_BITMASK(6,11));
>
> 	/* Reassign the BARs */
>-	assign_mmio_bars(gcid, scom, reg, mm_win);
>+	assign_mmio_bars(npu->chip_id, npu->xscom_base, reg, mm_win);
>+	npu->regs = (void *)reg[0];
>+	npu->mm_base = mm_win[0];
>+	npu->mm_size = mm_win[1];
>
> 	if (reg[0] && reg[1])
> 		prlog(PR_INFO, "   Global MMIO BAR:  %016llx (%lldMB)\n",

Once this settles, I can do a follow-up patch that creates a 
setup_device() in npu2.c like you have in npu2-opencapi.c, collapsing 
all these ugly hardcoded scom writes into a loop of per-device writes 
with properly named values.

Acked-by: Reza Arbab <arbab@linux.ibm.com>
Andrew Donnellan Aug. 21, 2018, 1:18 a.m. UTC | #2
On 21/08/18 02:43, Reza Arbab wrote:
> Verified that with this patchset, everything still seems to work fine 
> (on a 4-GPU Redbud, at least).

Thanks for testing!

> Once this settles, I can do a follow-up patch that creates a 
> setup_device() in npu2.c like you have in npu2-opencapi.c, collapsing 
> all these ugly hardcoded scom writes into a loop of per-device writes 
> with properly named values.

I've already got some followup patches in the works that are going to 
touch a lot of the mux stuff, so perhaps after that...
Alistair Popple Aug. 21, 2018, 6:34 a.m. UTC | #3
Couple of minor comments below, but nothing major.

Reviewed-By: Alistair Popple <alistair@popple.id.au>

<snip>

> +void probe_npu2(void)
> +{
> +	struct proc_chip *chip = next_chip(NULL);
> +	struct npu2 *npu;
> +	struct dt_node *np;
> +	const char *zcal;
> +
> +	/* Abort if we're running on DD1 */
> +	if (chip &&
> +	    (chip->type == PROC_CHIP_P9_NIMBUS ||
> +	     chip->type == PROC_CHIP_P9_CUMULUS) &&
> +	    (chip->ec_level & 0xf0) == 0x10) {
> +		prlog(PR_INFO, "NPU2: DD1 not supported\n");
> +		return;
> +	}
> +
> +	/* Check for a zcal override */
> +	zcal = nvram_query("nv_zcal_override");
> +	if (zcal) {
> +		nv_zcal_nominal = atoi(zcal);
> +		prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
> +	}
> +
> +	dt_for_each_compatible(dt_root, np, "ibm,power9-npu") {
> +	        npu = setup_npu(np);
> +		if (npu) {

Not sure if you're just being defensive but setup_npu() can never return
npu == NULL (it asserts instead) so the check is somewhat superfluous.

> +			setup_devices(npu);
> +		}
> +	}
> +}

<snip>

>  /*
> diff --git a/include/npu2.h b/include/npu2.h
> index 10742031ec0f..9fdc438bb4c1 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -147,6 +147,7 @@ struct npu2 {
>  	uint32_t	chip_id;
>  	uint64_t	xscom_base;
>  	void		*regs;
> +	void		*ocapi_global_mmio_base;

This is only one value so at this point it doesn't matter but if we start adding
more values to struct npu2 that are exclusive to NPU2 xor OCAPI it would be nice
to have these in different structs so it's obvious which ones are
initialised/used in each case.

>  	uint64_t	mm_base;
>  	uint64_t	mm_size;
>  	uint32_t	base_lsi;
> @@ -167,6 +168,7 @@ struct npu2 {
>  
>  	/* NVLink */
>  	struct phb	phb_nvlink;
> +	uint32_t	phb_index;
>  };
Andrew Donnellan Aug. 21, 2018, 7:01 a.m. UTC | #4
On 21/08/18 16:34, Alistair Popple wrote:
> Couple of minor comments below, but nothing major.
> 
> Reviewed-By: Alistair Popple <alistair@popple.id.au>

Thanks :)

>> +	dt_for_each_compatible(dt_root, np, "ibm,power9-npu") {
>> +	        npu = setup_npu(np);
>> +		if (npu) {
> 
> Not sure if you're just being defensive but setup_npu() can never return
> npu == NULL (it asserts instead) so the check is somewhat superfluous.

I think at some point it was able to return NULL and then I shuffled 
things around a bit...

>> diff --git a/include/npu2.h b/include/npu2.h
>> index 10742031ec0f..9fdc438bb4c1 100644
>> --- a/include/npu2.h
>> +++ b/include/npu2.h
>> @@ -147,6 +147,7 @@ struct npu2 {
>>   	uint32_t	chip_id;
>>   	uint64_t	xscom_base;
>>   	void		*regs;
>> +	void		*ocapi_global_mmio_base;
> 
> This is only one value so at this point it doesn't matter but if we start adding
> more values to struct npu2 that are exclusive to NPU2 xor OCAPI it would be nice
> to have these in different structs so it's obvious which ones are
> initialised/used in each case.

I've gone back and forth on this... in principle I agree but C doesn't 
make nested struct namespacing look particularly elegant and I keep 
screwing up . vs -> when I do that... but yes, if we need to add more 
fields then maybe it's worth doing that.
diff mbox series

Patch

diff --git a/core/init.c b/core/init.c
index ca6c468c3941..9095981e67e0 100644
--- a/core/init.c
+++ b/core/init.c
@@ -1126,8 +1126,6 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	/* Probe NPUs */
 	probe_npu();
 	probe_npu2();
-	/* TODO: Eventually, we'll do NVLink and OpenCAPI together */
-	probe_npu2_opencapi();
 
 	/* Initialize PCI */
 	pci_init_slots();
diff --git a/hw/npu2-common.c b/hw/npu2-common.c
index d076b4906fcc..8adab7d22094 100644
--- a/hw/npu2-common.c
+++ b/hw/npu2-common.c
@@ -20,6 +20,7 @@ 
 #include <npu2.h>
 #include <npu2-regs.h>
 #include <bitutils.h>
+#include <nvram.h>
 
 enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link)
 {
@@ -107,3 +108,122 @@  void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
 	npu2_scom_write(p->chip_id, p->xscom_base, reg, NPU2_MISC_DA_LEN_4B,
 			(uint64_t)new_val << 32);
 }
+
+static struct npu2 *setup_npu(struct dt_node *dn)
+{
+	struct npu2 *npu;
+	struct npu2_dev *dev;
+	struct dt_node *np;
+	uint32_t num_links;
+	void *npumem;
+	char *path;
+	int gcid;
+	struct proc_chip *chip;
+	int i = 0;
+
+	/* Retrieve chip ID */
+	path = dt_get_path(dn);
+	gcid = dt_get_chip_id(dn);
+	chip = get_chip(gcid);
+	assert(chip);
+
+	num_links = dt_prop_get_u32(dn, "ibm,npu-links");
+	npumem = zalloc(sizeof(struct npu2) + num_links *
+			sizeof(struct npu2_dev));
+	assert(npumem);
+	npu = npumem;
+
+	npu->dt_node = dn;
+	npu->index = dt_prop_get_u32(dn, "ibm,npu-index");
+	npu->chip_id = gcid;
+	npu->xscom_base = dt_get_address(dn, 0, NULL);
+	npu->phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
+	npu->devices = npumem + sizeof(struct npu2);
+
+	dt_for_each_compatible(dn, np, "ibm,npu-link") {
+		dev = &npu->devices[i];
+		dev->link_index = dt_prop_get_u32(np, "ibm,npu-link-index");
+		/* May be overridden by platform presence detection */
+		dev->brick_index = dev->link_index;
+		dev->type = npu2_dt_link_dev_type(np);
+		dev->npu = npu;
+		dev->dt_node = np;
+		dev->pl_xscom_base = dt_prop_get_u64(np, "ibm,npu-phy");
+		dev->lane_mask = dt_prop_get_u32(np, "ibm,npu-lane-mask");
+		dev->link_speed = dt_prop_get_u64(np, "ibm,link-speed");
+		i++;
+	};
+	npu->total_devices = i;
+
+	prlog(PR_INFO, "NPU: Chip %d Found NPU2#%d (%d links) at %s\n",
+	      npu->chip_id, npu->index, npu->total_devices, path);
+	prlog(PR_INFO, "   SCOM Base:  %08llx\n", npu->xscom_base);
+	free(path);
+	return npu;
+}
+
+static void setup_devices(struct npu2 *npu)
+{
+	bool nvlink_detected = false, ocapi_detected = false;
+	struct npu2_dev *dev;
+
+	/*
+	 * TODO: In future, we'll do brick configuration here to support mixed
+	 * setups.
+	 */
+	for (int i = 0; i < npu->total_devices; i++) {
+		dev = &npu->devices[i];
+		switch (dev->type) {
+		case NPU2_DEV_TYPE_NVLINK:
+			nvlink_detected = true;
+			break;
+		case NPU2_DEV_TYPE_OPENCAPI:
+			ocapi_detected = true;
+			break;
+		default:
+			prlog(PR_INFO, "NPU: Link %d device not present\n",
+			      npu->devices[i].link_index);
+		}
+	}
+
+	if (nvlink_detected && ocapi_detected) {
+		prlog(PR_ERR, "NPU: NVLink and OpenCAPI devices on same chip not supported, aborting NPU init\n");
+		return;
+	}
+
+	if (nvlink_detected)
+		npu2_nvlink_init_npu(npu);
+	else if (ocapi_detected)
+		npu2_opencapi_init_npu(npu);
+}
+
+void probe_npu2(void)
+{
+	struct proc_chip *chip = next_chip(NULL);
+	struct npu2 *npu;
+	struct dt_node *np;
+	const char *zcal;
+
+	/* Abort if we're running on DD1 */
+	if (chip &&
+	    (chip->type == PROC_CHIP_P9_NIMBUS ||
+	     chip->type == PROC_CHIP_P9_CUMULUS) &&
+	    (chip->ec_level & 0xf0) == 0x10) {
+		prlog(PR_INFO, "NPU2: DD1 not supported\n");
+		return;
+	}
+
+	/* Check for a zcal override */
+	zcal = nvram_query("nv_zcal_override");
+	if (zcal) {
+		nv_zcal_nominal = atoi(zcal);
+		prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
+	}
+
+	dt_for_each_compatible(dt_root, np, "ibm,power9-npu") {
+	        npu = setup_npu(np);
+		if (npu) {
+			setup_devices(npu);
+		}
+	}
+}
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 68ae93a2f836..ead8ec011341 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -1446,11 +1446,14 @@  static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
 	 * Add the addresses of the registers needed by the OS to handle
 	 * faults. The OS accesses them by mmio.
 	 */
-	dsisr  = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DSISR(stacku, block);
-	dar    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DAR(stacku, block);
-	tfc    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_TFC(stacku, block);
-	handle = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_PEHANDLE(stacku,
-								block);
+	dsisr  = (uint64_t) dev->npu->ocapi_global_mmio_base +
+		NPU2_OTL_OSL_DSISR(stacku, block);
+	dar    = (uint64_t) dev->npu->ocapi_global_mmio_base +
+		NPU2_OTL_OSL_DAR(stacku, block);
+	tfc    = (uint64_t) dev->npu->ocapi_global_mmio_base +
+		NPU2_OTL_OSL_TFC(stacku, block);
+	handle = (uint64_t) dev->npu->ocapi_global_mmio_base +
+		NPU2_OTL_OSL_PEHANDLE(stacku, block);
 	dt_add_property_cells(pd->dn, "ibm,opal-xsl-irq", irq);
 	dt_add_property_cells(pd->dn, "ibm,opal-xsl-mmio",
 			hi32(dsisr), lo32(dsisr),
@@ -1576,20 +1579,15 @@  static void setup_debug_training_state(struct npu2_dev *dev)
 	}
 }
 
-static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
-				       struct npu2_dev *dev)
+static void setup_device(struct npu2_dev *dev)
 {
-	uint32_t dev_index, npu_index;
 	struct dt_node *dn_phb, *dn;
 	struct pci_slot *slot;
 	char port_name[17];
 	uint64_t mm_win[2];
 
-	dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
-	npu_index = dt_prop_get_u32(n->dt_node, "ibm,npu-index");
-
 	/* Populate PHB device node */
-	phys_map_get(n->chip_id, NPU_OCAPI_MMIO, dev_index, &mm_win[0],
+	phys_map_get(dev->npu->chip_id, NPU_OCAPI_MMIO, dev->brick_index, &mm_win[0],
 		     &mm_win[1]);
 	prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n",
 	      mm_win[0], mm_win[1]);
@@ -1609,40 +1607,28 @@  static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
 
 	dt_add_property_strings(dn_phb, "device_type", "pciex");
 	dt_add_property(dn_phb, "reg", mm_win, sizeof(mm_win));
-	dt_add_property_cells(dn_phb, "ibm,npu-index", npu_index);
-	dt_add_property_cells(dn_phb, "ibm,chip-id", n->chip_id);
-	dt_add_property_cells(dn_phb, "ibm,xscom-base", n->xscom_base);
-	dt_add_property_cells(dn_phb, "ibm,npcq", n->dt_node->phandle);
+	dt_add_property_cells(dn_phb, "ibm,npu-index", dev->npu->index);
+	dt_add_property_cells(dn_phb, "ibm,chip-id", dev->npu->chip_id);
+	dt_add_property_cells(dn_phb, "ibm,xscom-base", dev->npu->xscom_base);
+	dt_add_property_cells(dn_phb, "ibm,npcq", dev->npu->dt_node->phandle);
 	dt_add_property_cells(dn_phb, "ibm,links", 1);
 	dt_add_property(dn_phb, "ibm,mmio-window", mm_win, sizeof(mm_win));
 	dt_add_property_cells(dn_phb, "ibm,phb-diag-data-size", 0);
 	dt_add_property_cells(dn_phb, "ibm,opal-num-pes", NPU2_MAX_PE_NUM);
 
-	n->mm_base = mm_win[0];
-	n->mm_size = mm_win[1];
-
 	dt_add_property_cells(dn_phb, "ranges", 0x02000000,
-			      hi32(n->mm_base), lo32(n->mm_base),
-			      hi32(n->mm_base), lo32(n->mm_base),
-			      hi32(n->mm_size), lo32(n->mm_size));
+			      hi32(mm_win[0]), lo32(mm_win[0]),
+			      hi32(mm_win[0]), lo32(mm_win[0]),
+			      hi32(mm_win[1]), lo32(mm_win[1]));
 
-	dev->type = NPU2_DEV_TYPE_OPENCAPI;
-	dev->npu = n;
-	dev->dt_node = dn_link;
 	dev->phb_ocapi.dt_node = dn_phb;
 	dev->phb_ocapi.ops = &npu2_opencapi_ops;
 	dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi;
 	dev->phb_ocapi.scan_map = 0;
-	dev->link_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
-	dev->brick_index = dev->link_index;
-	dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy");
-	dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask");
-	dev->link_speed = dt_prop_get_u64(dn_link, "ibm,link-speed");
+
 	dev->bdfn = 0;
 	dev->train_need_fence = false;
 	dev->train_fenced = false;
-	n->total_devices++;
-
 	/* Find I2C port for handling device reset */
 	snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
 		 dev->npu->chip_id, platform.ocapi->i2c_engine,
@@ -1663,11 +1649,11 @@  static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
 
 	/* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
 	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
-	setup_afu_mmio_bars(n->chip_id, n->xscom_base, dev);
+	setup_afu_mmio_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
 	/* Procedure 13.1.3.9 - AFU Config BARs */
-	setup_afu_config_bars(n->chip_id, n->xscom_base, dev);
+	setup_afu_config_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
 
-	set_fence_control(n->chip_id, n->xscom_base, dev->brick_index, 0b00);
+	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base, dev->brick_index, 0b00);
 
 	if (npu2_ocapi_training_state != NPU2_TRAIN_DEFAULT) {
 		setup_debug_training_state(dev);
@@ -1689,107 +1675,72 @@  failed:
 	return;
 }
 
-static void npu2_opencapi_probe(struct dt_node *dn)
+static void read_nvram_training_state(void)
 {
-	struct dt_node *link;
-	char *path;
-	uint32_t gcid, index, links, scom_base;
-	uint64_t reg[2];
-	uint64_t dev_index;
-	struct npu2 *n;
-	int rc, i = 0;
-
-	gcid = dt_get_chip_id(dn);
-	index = dt_prop_get_u32(dn, "ibm,npu-index");
-	links = dt_prop_get_u32(dn, "ibm,npu-links");
-
-	/* Don't try to init when we have an NVLink link */
-	dt_for_each_compatible(dn, link, "ibm,npu-link") {
-		if (npu2_dt_link_dev_type(link) != NPU2_DEV_TYPE_OPENCAPI) {
-			prlog(PR_DEBUG,
-			      "OCAPI: NPU%d: Non-OpenCAPI link found, skipping OpenCAPI init\n",
-			      index);
-			return;
-		}
+	const char *state;
+
+	state = nvram_query("opencapi-link-training");
+	if (state) {
+		if (!strcmp(state, "prbs31"))
+			npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
+		else if (!strcmp(state, "none"))
+			npu2_ocapi_training_state = NPU2_TRAIN_NONE;
+		else
+			prlog(PR_WARNING,
+			      "OCAPI: invalid training state in NVRAM: %s\n",
+			      state);
 	}
+}
 
-	path = dt_get_path(dn);
-	prlog(PR_INFO, "OCAPI: Chip %d Found OpenCAPI NPU%d (%d links) at %s\n",
-	      gcid, index, links, path);
-	free(path);
+int npu2_opencapi_init_npu(struct npu2 *npu)
+{
+	struct npu2_dev *dev;
+	uint64_t reg[2];
+	int rc;
 
 	assert(platform.ocapi);
+	read_nvram_training_state();
 
 	/* TODO: Test OpenCAPI with fast reboot and make it work */
 	disable_fast_reboot("OpenCAPI device enabled");
 
-	scom_base = dt_get_address(dn, 0, NULL);
-	prlog(PR_INFO, "OCAPI:	 SCOM Base:  %08x\n", scom_base);
+	setup_global_mmio_bar(npu->chip_id, npu->xscom_base, reg);
 
-	setup_global_mmio_bar(gcid, scom_base, reg);
+	npu->ocapi_global_mmio_base = (void *)reg[0];
 
-	n = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
-	n->devices = (struct npu2_dev *)(n + 1);
-	n->chip_id = gcid;
-	n->xscom_base = scom_base;
-	n->regs = (void *)reg[0];
-	n->dt_node = dn;
+	for (int i = 0; i < npu->total_devices; i++) {
+		dev = &npu->devices[i];
+		if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
+			continue;
 
-	dt_for_each_compatible(dn, link, "ibm,npu-link") {
-		dev_index = dt_prop_get_u32(link, "ibm,npu-link-index");
-		prlog(PR_INFO, "OCAPI: Configuring link index %lld\n",
-		      dev_index);
+		prlog(PR_INFO, "OCAPI: Configuring link index %d, brick %d\n",
+		      dev->link_index, dev->brick_index);
 
 		/* Procedure 13.1.3.1 - Select OCAPI vs NVLink */
-		brick_config(gcid, scom_base, dev_index);
+		brick_config(npu->chip_id, npu->xscom_base, dev->brick_index);
 
 		/* Procedure 13.1.3.5 - Transaction Layer Configuration */
-		tl_config(gcid, scom_base, dev_index);
+		tl_config(npu->chip_id, npu->xscom_base, dev->brick_index);
 
 		/* Procedure 13.1.3.6 - Address Translation Configuration */
-		address_translation_config(gcid, scom_base, dev_index);
+		address_translation_config(npu->chip_id, npu->xscom_base, dev->brick_index);
 	}
 
 	/* Procedure 13.1.3.10 - Interrupt Configuration */
-	rc = setup_irq(n);
+	rc = setup_irq(npu);
 	if (rc)
 		goto failed;
 
-	dt_for_each_compatible(dn, link, "ibm,npu-link") {
-		npu2_opencapi_setup_device(link, n, &n->devices[i]);
-		i++;
+	for (int i = 0; i < npu->total_devices; i++) {
+		dev = &npu->devices[i];
+		if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
+			continue;
+		setup_device(dev);
 	}
 
-	return;
+	return 0;
 failed:
-	free(n);
-}
-
-static void read_nvram_training_state(void)
-{
-	const char *state;
-
-	state = nvram_query("opencapi-link-training");
-	if (state) {
-		if (!strcmp(state, "prbs31"))
-			npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
-		else if (!strcmp(state, "none"))
-			npu2_ocapi_training_state = NPU2_TRAIN_NONE;
-		else
-			prlog(PR_WARNING,
-			      "OCAPI: invalid training state in NVRAM: %s\n",
-			      state);
-	}
-}
-
-void probe_npu2_opencapi(void)
-{
-	struct dt_node *np_npu;
-
-	read_nvram_training_state();
-
-	dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu")
-		npu2_opencapi_probe(np_npu);
+	return -1;
 }
 
 static const struct phb_ops npu2_opencapi_ops = {
diff --git a/hw/npu2.c b/hw/npu2.c
index be1c304420fc..419dd37a24b1 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -1357,44 +1357,13 @@  static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint
 }
 
 /*
- * Probe NPU2 device node and create PCI root device node
- * accordingly. The NPU2 device node should specify number
- * of links and xscom base address to access links.
+ * Set up NPU for NVLink and create PCI root device node
+ * accordingly.
  */
-static void npu2_probe_phb(struct dt_node *dn)
+int npu2_nvlink_init_npu(struct npu2 *npu)
 {
-	struct proc_chip *proc_chip;
-	struct dt_node *np, *link;
-	bool ocapi_detected = false, nvlink_detected = false;
-	uint32_t gcid, scom, index, phb_index, links;
+	struct dt_node *np;
 	uint64_t reg[2], mm_win[2], val;
-	char *path;
-
-	/* Abort if any OpenCAPI links detected */
-	dt_for_each_compatible(dn, link, "ibm,npu-link") {
-		if (npu2_dt_link_dev_type(link) == NPU2_DEV_TYPE_OPENCAPI)
-			ocapi_detected = true;
-		else
-			nvlink_detected = true;
-	}
-
-	if (ocapi_detected && nvlink_detected) {
-		prlog(PR_ERR, "NPU: NVLink and OpenCAPI devices on same chip not supported\n");
-	        return;
-	} else if (ocapi_detected) {
-		prlog(PR_INFO, "NPU: OpenCAPI link configuration detected, not initialising NVLink\n");
-		return;
-	}
-
-	/* Retrieve chip id */
-	path = dt_get_path(dn);
-	gcid = dt_get_chip_id(dn);
-	proc_chip = get_chip(gcid);
-	assert(proc_chip);
-	if ((proc_chip->ec_level & 0xf0) > 0x20) {
-		prerror("NPU: unsupported ec level on Chip 0x%x!\n", gcid);
-		return;
-	}
 
 	/* TODO: Clean this up with register names, etc. when we get
 	 * time. This just turns NVLink mode on in each brick and should
@@ -1403,64 +1372,56 @@  static void npu2_probe_phb(struct dt_node *dn)
 	 *
 	 * Obviously if the year is now 2020 that didn't happen and you
 	 * should fix this :-) */
-	xscom_write_mask(gcid, 0x5011000, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011030, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011060, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011090, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011200, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011230, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011260, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011290, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011400, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011430, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011460, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(gcid, 0x5011490, PPC_BIT(58), PPC_BIT(58));
-
-	xscom_write_mask(gcid, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
-	xscom_write_mask(gcid, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
-	xscom_write_mask(gcid, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
-	xscom_write_mask(gcid, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
-	xscom_write_mask(gcid, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
-	xscom_write_mask(gcid, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
+	xscom_write_mask(npu->chip_id, 0x5011000, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011030, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011060, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011090, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011200, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011230, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011260, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011290, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011400, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011430, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011460, PPC_BIT(58), PPC_BIT(58));
+	xscom_write_mask(npu->chip_id, 0x5011490, PPC_BIT(58), PPC_BIT(58));
+
+	xscom_write_mask(npu->chip_id, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
+	xscom_write_mask(npu->chip_id, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
+	xscom_write_mask(npu->chip_id, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
+	xscom_write_mask(npu->chip_id, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
+	xscom_write_mask(npu->chip_id, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
+	xscom_write_mask(npu->chip_id, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
 
 	val = NPU2_NTL_MISC_CFG2_BRICK_ENABLE |
 	      NPU2_NTL_MISC_CFG2_NDL_TX_PARITY_ENA |
 	      NPU2_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA |
 	      NPU2_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA;
-	xscom_write_mask(gcid, 0x5011110, val, val);
-	xscom_write_mask(gcid, 0x5011130, val, val);
-	xscom_write_mask(gcid, 0x5011310, val, val);
-	xscom_write_mask(gcid, 0x5011330, val, val);
-	xscom_write_mask(gcid, 0x5011510, val, val);
-	xscom_write_mask(gcid, 0x5011530, val, val);
+	xscom_write_mask(npu->chip_id, 0x5011110, val, val);
+	xscom_write_mask(npu->chip_id, 0x5011130, val, val);
+	xscom_write_mask(npu->chip_id, 0x5011310, val, val);
+	xscom_write_mask(npu->chip_id, 0x5011330, val, val);
+	xscom_write_mask(npu->chip_id, 0x5011510, val, val);
+	xscom_write_mask(npu->chip_id, 0x5011530, val, val);
 
 	val = PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11);
-	xscom_write_mask(gcid, 0x5011009, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011039, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011069, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011099, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011209, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011239, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011269, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011299, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011409, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011439, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011469, val, PPC_BITMASK(6,11));
-	xscom_write_mask(gcid, 0x5011499, val, PPC_BITMASK(6,11));
-
-	index = dt_prop_get_u32(dn, "ibm,npu-index");
-	phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
-	links = dt_prop_get_u32(dn, "ibm,npu-links");
-	prlog(PR_INFO, "NPU: Chip %d Found NPU2#%d (%d links) at %s\n",
-	      gcid, index, links, path);
-	free(path);
-
-	/* Retrieve scom base address */
-	scom = dt_get_address(dn, 0, NULL);
-	prlog(PR_INFO, "   SCOM Base:  %08x\n", scom);
+	xscom_write_mask(npu->chip_id, 0x5011009, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011039, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011069, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011099, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011209, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011239, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011269, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011299, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011409, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011439, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011469, val, PPC_BITMASK(6,11));
+	xscom_write_mask(npu->chip_id, 0x5011499, val, PPC_BITMASK(6,11));
 
 	/* Reassign the BARs */
-	assign_mmio_bars(gcid, scom, reg, mm_win);
+	assign_mmio_bars(npu->chip_id, npu->xscom_base, reg, mm_win);
+	npu->regs = (void *)reg[0];
+	npu->mm_base = mm_win[0];
+	npu->mm_size = mm_win[1];
 
 	if (reg[0] && reg[1])
 		prlog(PR_INFO, "   Global MMIO BAR:  %016llx (%lldMB)\n",
@@ -1477,17 +1438,21 @@  static void npu2_probe_phb(struct dt_node *dn)
 				"ibm,ioda2-npu2-phb");
 	dt_add_property_strings(np, "device_type", "pciex");
 	dt_add_property(np, "reg", reg, sizeof(reg));
-	dt_add_property_cells(np, "ibm,phb-index", phb_index);
-	dt_add_property_cells(np, "ibm,npu-index", index);
-	dt_add_property_cells(np, "ibm,chip-id", gcid);
-	dt_add_property_cells(np, "ibm,xscom-base", scom);
-	dt_add_property_cells(np, "ibm,npcq", dn->phandle);
-	dt_add_property_cells(np, "ibm,links", links);
+	dt_add_property_cells(np, "ibm,phb-index", npu->phb_index);
+	dt_add_property_cells(np, "ibm,npu-index", npu->index);
+	dt_add_property_cells(np, "ibm,chip-id", npu->chip_id);
+	dt_add_property_cells(np, "ibm,xscom-base", npu->xscom_base);
+	dt_add_property_cells(np, "ibm,npcq", npu->dt_node->phandle);
+	dt_add_property_cells(np, "ibm,links", npu->total_devices);
 	dt_add_property(np, "ibm,mmio-window", mm_win, sizeof(mm_win));
 	dt_add_property_cells(np, "ibm,phb-diag-data-size", 0);
 
 	/* Disable fast reboot - not currently supported */
 	disable_fast_reboot("NVLink device enabled");
+
+	npu2_nvlink_create_phb(npu, np);
+
+	return 0;
 }
 
 static uint32_t npu2_populate_pcie_cap(struct npu2_dev *dev,
@@ -1949,49 +1914,25 @@  static void npu2_setup_irqs(struct npu2 *p)
 	npu2_write(p, reg, val);
 }
 
-static void npu2_create_phb(struct dt_node *dn)
+void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
 {
-	const struct dt_property *prop;
-	struct npu2 *p;
 	struct pci_slot *slot;
-	uint32_t links;
-	void *pmem;
-
-	/* Retrieve number of devices */
-	links = dt_prop_get_u32(dn, "ibm,links");
-	pmem = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
-	assert(pmem);
-
-	/* Populate PHB */
-	p = pmem;
-	p->index = dt_prop_get_u32(dn, "ibm,phb-index");
-	p->chip_id = dt_prop_get_u32(dn, "ibm,chip-id");
-	p->xscom_base = dt_prop_get_u32(dn, "ibm,xscom-base");
-	p->total_devices = links;
-	p->regs = (void *)dt_get_address(dn, 0, NULL);
-
-	prop = dt_require_property(dn, "ibm,mmio-window", -1);
-	assert(prop->len >= (2 * sizeof(uint64_t)));
-	p->mm_base = ((const uint64_t *)prop->prop)[0];
-	p->mm_size = ((const uint64_t *)prop->prop)[1];
-
-	p->devices = pmem + sizeof(struct npu2);
 
 	/* Generic PHB */
-	p->phb_nvlink.dt_node = dn;
-	p->phb_nvlink.ops = &npu_ops;
-	p->phb_nvlink.phb_type = phb_type_npu_v2;
-	init_lock(&p->lock);
-	init_lock(&p->phb_nvlink.lock);
-	list_head_init(&p->phb_nvlink.devices);
-	list_head_init(&p->phb_nvlink.virt_devices);
-
-	npu2_setup_irqs(p);
-	npu2_populate_devices(p, dn);
-	npu2_add_interrupt_map(p, dn);
-	npu2_add_phb_properties(p);
-
-	slot = npu2_slot_create(&p->phb_nvlink);
+	npu->phb_nvlink.dt_node = dn;
+	npu->phb_nvlink.ops = &npu_ops;
+	npu->phb_nvlink.phb_type = phb_type_npu_v2;
+	init_lock(&npu->lock);
+	init_lock(&npu->phb_nvlink.lock);
+	list_head_init(&npu->phb_nvlink.devices);
+	list_head_init(&npu->phb_nvlink.virt_devices);
+
+	npu2_setup_irqs(npu);
+	npu2_populate_devices(npu, dn);
+	npu2_add_interrupt_map(npu, dn);
+	npu2_add_phb_properties(npu);
+
+	slot = npu2_slot_create(&npu->phb_nvlink);
 	if (!slot)
 	{
 		/**
@@ -2002,41 +1943,10 @@  static void npu2_create_phb(struct dt_node *dn)
 		prlog(PR_ERR, "NPU: Cannot create PHB slot\n");
 	}
 
-	pci_register_phb(&p->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
-
-	npu2_init_ioda_cache(p);
-	npu2_hw_init(p);
-}
-
-void probe_npu2(void)
-{
-	struct proc_chip *chip = next_chip(NULL);
-	struct dt_node *np;
-	const char *zcal;
-
-	/* Abort if we're running on DD1 */
-	if (chip &&
-	    (chip->type == PROC_CHIP_P9_NIMBUS ||
-	     chip->type == PROC_CHIP_P9_CUMULUS) &&
-	    (chip->ec_level & 0xf0) == 0x10) {
-		prlog(PR_INFO, "NPU: DD1 not supported\n");
-		return;
-	}
-
-	/* Check for a zcal override */
-	zcal = nvram_query("nv_zcal_override");
-	if (zcal) {
-		nv_zcal_nominal = atoi(zcal);
-		prlog(PR_WARNING, "NPU: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
-	}
-
-	/* Scan NPU2 XSCOM nodes */
-	dt_for_each_compatible(dt_root, np, "ibm,power9-npu")
-		npu2_probe_phb(np);
+	pci_register_phb(&npu->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
 
-	/* Scan newly created PHB nodes */
-	dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex")
-		npu2_create_phb(np);
+	npu2_init_ioda_cache(npu);
+	npu2_hw_init(npu);
 }
 
 /*
diff --git a/include/npu2.h b/include/npu2.h
index 10742031ec0f..9fdc438bb4c1 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -147,6 +147,7 @@  struct npu2 {
 	uint32_t	chip_id;
 	uint64_t	xscom_base;
 	void		*regs;
+	void		*ocapi_global_mmio_base;
 	uint64_t	mm_base;
 	uint64_t	mm_size;
 	uint32_t	base_lsi;
@@ -167,6 +168,7 @@  struct npu2 {
 
 	/* NVLink */
 	struct phb	phb_nvlink;
+	uint32_t	phb_index;
 };
 
 static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
@@ -193,6 +195,10 @@  static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev)
 	}
 }
 
+int npu2_opencapi_init_npu(struct npu2 *npu);
+int npu2_nvlink_init_npu(struct npu2 *npu);
+void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn);
+
 enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link);
 void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val);
 uint32_t npu2_read_4b(struct npu2 *p, uint64_t reg);
diff --git a/include/skiboot.h b/include/skiboot.h
index bba76c12c39e..f2818b1a2326 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -201,7 +201,6 @@  extern int preload_capp_ucode(void);
 extern void preload_io_vpd(void);
 extern void probe_npu(void);
 extern void probe_npu2(void);
-extern void probe_npu2_opencapi(void);
 extern void uart_init(void);
 extern void mbox_init(void);
 extern void early_uart_init(void);