diff mbox series

[kernel] KVM: PPC: Book3s: Remove real mode interrupt controller hcalls handlers

Message ID 20220509071150.181250-1-aik@ozlabs.ru
State New
Headers show
Series [kernel] KVM: PPC: Book3s: Remove real mode interrupt controller hcalls handlers | expand

Commit Message

Alexey Kardashevskiy May 9, 2022, 7:11 a.m. UTC
Currently we have 2 sets of interrupt controller hypercalls handlers
for real and virtual modes, this is from POWER8 times when switching
MMU on was considered an expensive operation.

POWER9 however does not have dependent threads and MMU is enabled for
handling hcalls so the XIVE native or XICS-on-XIVE real mode handlers
never execute on real P9 and later CPUs.

This untemplate the handlers and only keeps the real mode handlers for
XICS native (up to POWER8) and remove the rest of dead code. Changes
in functions are mechanical except few missing empty lines to make
checkpatch.pl happy.

The default implemented hcalls list already contains XICS hcalls so
no change there.

This should not cause any behavioral change.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---

Minus 153 lines nevertheless.


---
 arch/powerpc/kvm/Makefile               |   2 +-
 arch/powerpc/include/asm/kvm_ppc.h      |   7 -
 arch/powerpc/kvm/book3s_xive.h          |   7 -
 arch/powerpc/kvm/book3s_hv_builtin.c    |  64 ---
 arch/powerpc/kvm/book3s_hv_rm_xics.c    |   5 +
 arch/powerpc/kvm/book3s_hv_rm_xive.c    |  46 --
 arch/powerpc/kvm/book3s_xive.c          | 638 +++++++++++++++++++++++-
 arch/powerpc/kvm/book3s_xive_template.c | 636 -----------------------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  12 +-
 9 files changed, 632 insertions(+), 785 deletions(-)
 delete mode 100644 arch/powerpc/kvm/book3s_hv_rm_xive.c
 delete mode 100644 arch/powerpc/kvm/book3s_xive_template.c

Comments

Cédric Le Goater May 10, 2022, 5:58 p.m. UTC | #1
Hello Alexey,

On 5/9/22 09:11, Alexey Kardashevskiy wrote:
> Currently we have 2 sets of interrupt controller hypercalls handlers
> for real and virtual modes, this is from POWER8 times when switching
> MMU on was considered an expensive operation.
> 
> POWER9 however does not have dependent threads and MMU is enabled for
> handling hcalls so the XIVE native 

XIVE native does not have any real-mode hcall handlers. In fact, all
are handled at the QEMU level.

> or XICS-on-XIVE real mode handlers never execute on real P9 and > later CPUs.

They are not ? I am surprised. It must be a "recent" change. Any how,
if you can remove them safely, this is good news and you should be able
to clean up some more code in the PowerNV native interface.

> 
> This untemplate the handlers and only keeps the real mode handlers for
> XICS native (up to POWER8) and remove the rest of dead code. Changes
> in functions are mechanical except few missing empty lines to make
> checkpatch.pl happy.
> 
> The default implemented hcalls list already contains XICS hcalls so
> no change there.
> 
> This should not cause any behavioral change.

In the worse case, it impacts performance a bit but only on "old" distros
(kernel < 4.14), I doubt anyone will complain.

> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

Acked-by: Cédric Le Goater <clg@kaod.org>

Thanks,

C.

  
> ---
>   arch/powerpc/kvm/Makefile               |   2 +-
>   arch/powerpc/include/asm/kvm_ppc.h      |   7 -
>   arch/powerpc/kvm/book3s_xive.h          |   7 -
>   arch/powerpc/kvm/book3s_hv_builtin.c    |  64 ---
>   arch/powerpc/kvm/book3s_hv_rm_xics.c    |   5 +
>   arch/powerpc/kvm/book3s_hv_rm_xive.c    |  46 --
>   arch/powerpc/kvm/book3s_xive.c          | 638 +++++++++++++++++++++++-
>   arch/powerpc/kvm/book3s_xive_template.c | 636 -----------------------
>   arch/powerpc/kvm/book3s_hv_rmhandlers.S |  12 +-
>   9 files changed, 632 insertions(+), 785 deletions(-)
>   delete mode 100644 arch/powerpc/kvm/book3s_hv_rm_xive.c
>   delete mode 100644 arch/powerpc/kvm/book3s_xive_template.c
> 
> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
> index 8e3681a86074..f17379b0f161 100644
> --- a/arch/powerpc/kvm/Makefile
> +++ b/arch/powerpc/kvm/Makefile
> @@ -73,7 +73,7 @@ kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
>   	book3s_hv_tm.o
>   
>   kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
> -	book3s_hv_rm_xics.o book3s_hv_rm_xive.o
> +	book3s_hv_rm_xics.o
>   
>   kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
>   	book3s_hv_tm_builtin.o
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 44200a27371b..a775377a570e 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -787,13 +787,6 @@ long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
>   			   unsigned long dest, unsigned long src);
>   long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
>                             unsigned long slb_v, unsigned int status, bool data);
> -unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
> -unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
> -unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
> -int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
> -                    unsigned long mfrr);
> -int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
> -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
>   void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
>   
>   /*
> diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
> index 09d0657596c3..1e48f72e8aa5 100644
> --- a/arch/powerpc/kvm/book3s_xive.h
> +++ b/arch/powerpc/kvm/book3s_xive.h
> @@ -285,13 +285,6 @@ static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
>   	return cur & 0x7fffffff;
>   }
>   
> -extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
> -extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
> -extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
> -			 unsigned long mfrr);
> -extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
> -extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
> -
>   /*
>    * Common Xive routines for XICS-over-XIVE and XIVE native
>    */
> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index 7e52d0beee77..88a8f6473c4e 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -489,70 +489,6 @@ static long kvmppc_read_one_intr(bool *again)
>   	return kvmppc_check_passthru(xisr, xirr, again);
>   }
>   
> -#ifdef CONFIG_KVM_XICS
> -unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
> -{
> -	if (!kvmppc_xics_enabled(vcpu))
> -		return H_TOO_HARD;
> -	if (xics_on_xive())
> -		return xive_rm_h_xirr(vcpu);
> -	else
> -		return xics_rm_h_xirr(vcpu);
> -}
> -
> -unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
> -{
> -	if (!kvmppc_xics_enabled(vcpu))
> -		return H_TOO_HARD;
> -	vcpu->arch.regs.gpr[5] = get_tb();
> -	if (xics_on_xive())
> -		return xive_rm_h_xirr(vcpu);
> -	else
> -		return xics_rm_h_xirr(vcpu);
> -}
> -
> -unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
> -{
> -	if (!kvmppc_xics_enabled(vcpu))
> -		return H_TOO_HARD;
> -	if (xics_on_xive())
> -		return xive_rm_h_ipoll(vcpu, server);
> -	else
> -		return H_TOO_HARD;
> -}
> -
> -int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
> -		    unsigned long mfrr)
> -{
> -	if (!kvmppc_xics_enabled(vcpu))
> -		return H_TOO_HARD;
> -	if (xics_on_xive())
> -		return xive_rm_h_ipi(vcpu, server, mfrr);
> -	else
> -		return xics_rm_h_ipi(vcpu, server, mfrr);
> -}
> -
> -int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
> -{
> -	if (!kvmppc_xics_enabled(vcpu))
> -		return H_TOO_HARD;
> -	if (xics_on_xive())
> -		return xive_rm_h_cppr(vcpu, cppr);
> -	else
> -		return xics_rm_h_cppr(vcpu, cppr);
> -}
> -
> -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
> -{
> -	if (!kvmppc_xics_enabled(vcpu))
> -		return H_TOO_HARD;
> -	if (xics_on_xive())
> -		return xive_rm_h_eoi(vcpu, xirr);
> -	else
> -		return xics_rm_h_eoi(vcpu, xirr);
> -}
> -#endif /* CONFIG_KVM_XICS */
> -
>   void kvmppc_bad_interrupt(struct pt_regs *regs)
>   {
>   	/*
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
> index 587c33fc4564..e2246b715f68 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
> @@ -479,6 +479,11 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
>   	}
>   }
>   
> +unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
> +{
> +	vcpu->arch.regs.gpr[5] = get_tb();
> +	return xics_rm_h_xirr(vcpu);
> +}
>   
>   unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
>   {
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c
> deleted file mode 100644
> index dd9880731bd6..000000000000
> --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c
> +++ /dev/null
> @@ -1,46 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -#include <linux/kernel.h>
> -#include <linux/kvm_host.h>
> -#include <linux/err.h>
> -#include <linux/kernel_stat.h>
> -#include <linux/pgtable.h>
> -
> -#include <asm/kvm_book3s.h>
> -#include <asm/kvm_ppc.h>
> -#include <asm/hvcall.h>
> -#include <asm/xics.h>
> -#include <asm/debug.h>
> -#include <asm/synch.h>
> -#include <asm/cputhreads.h>
> -#include <asm/ppc-opcode.h>
> -#include <asm/pnv-pci.h>
> -#include <asm/opal.h>
> -#include <asm/smp.h>
> -#include <asm/xive.h>
> -#include <asm/xive-regs.h>
> -
> -#include "book3s_xive.h"
> -
> -/* XXX */
> -#include <asm/udbg.h>
> -//#define DBG(fmt...) udbg_printf(fmt)
> -#define DBG(fmt...) do { } while(0)
> -
> -static inline void __iomem *get_tima_phys(void)
> -{
> -	return local_paca->kvm_hstate.xive_tima_phys;
> -}
> -
> -#undef XIVE_RUNTIME_CHECKS
> -#define X_PFX xive_rm_
> -#define X_STATIC
> -#define X_STAT_PFX stat_rm_
> -#define __x_tima		get_tima_phys()
> -#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_page))
> -#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_page))
> -#define __x_writeb	__raw_rm_writeb
> -#define __x_readw	__raw_rm_readw
> -#define __x_readq	__raw_rm_readq
> -#define __x_writeq	__raw_rm_writeq
> -
> -#include "book3s_xive_template.c"
> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> index c0ce5531d9bc..65515a96498a 100644
> --- a/arch/powerpc/kvm/book3s_xive.c
> +++ b/arch/powerpc/kvm/book3s_xive.c
> @@ -30,27 +30,629 @@
>   
>   #include "book3s_xive.h"
>   
> -
> -/*
> - * Virtual mode variants of the hcalls for use on radix/radix
> - * with AIL. They require the VCPU's VP to be "pushed"
> - *
> - * We still instantiate them here because we use some of the
> - * generated utility functions as well in this file.
> - */
> -#define XIVE_RUNTIME_CHECKS
> -#define X_PFX xive_vm_
> -#define X_STATIC static
> -#define X_STAT_PFX stat_vm_
> -#define __x_tima		xive_tima
>   #define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_mmio))
>   #define __x_trig_page(xd)	((void __iomem *)((xd)->trig_mmio))
> -#define __x_writeb	__raw_writeb
> -#define __x_readw	__raw_readw
> -#define __x_readq	__raw_readq
> -#define __x_writeq	__raw_writeq
>   
> -#include "book3s_xive_template.c"
> +/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
> +#define XICS_DUMMY	1
> +
> +static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
> +{
> +	u8 cppr;
> +	u16 ack;
> +
> +	/*
> +	 * Ensure any previous store to CPPR is ordered vs.
> +	 * the subsequent loads from PIPR or ACK.
> +	 */
> +	eieio();
> +
> +	/* Perform the acknowledge OS to register cycle. */
> +	ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
> +
> +	/* Synchronize subsequent queue accesses */
> +	mb();
> +
> +	/* XXX Check grouping level */
> +
> +	/* Anything ? */
> +	if (!((ack >> 8) & TM_QW1_NSR_EO))
> +		return;
> +
> +	/* Grab CPPR of the most favored pending interrupt */
> +	cppr = ack & 0xff;
> +	if (cppr < 8)
> +		xc->pending |= 1 << cppr;
> +
> +	/* Check consistency */
> +	if (cppr >= xc->hw_cppr)
> +		pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
> +			smp_processor_id(), cppr, xc->hw_cppr);
> +
> +	/*
> +	 * Update our image of the HW CPPR. We don't yet modify
> +	 * xc->cppr, this will be done as we scan for interrupts
> +	 * in the queues.
> +	 */
> +	xc->hw_cppr = cppr;
> +}
> +
> +static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
> +{
> +	u64 val;
> +
> +	if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
> +		offset |= XIVE_ESB_LD_ST_MO;
> +
> +	val = __raw_readq(__x_eoi_page(xd) + offset);
> +#ifdef __LITTLE_ENDIAN__
> +	val >>= 64-8;
> +#endif
> +	return (u8)val;
> +}
> +
> +
> +static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
> +{
> +	/* If the XIVE supports the new "store EOI facility, use it */
> +	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
> +		__raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
> +	else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
> +		/*
> +		 * For LSIs the HW EOI cycle is used rather than PQ bits,
> +		 * as they are automatically re-triggred in HW when still
> +		 * pending.
> +		 */
> +		__raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
> +	} else {
> +		uint64_t eoi_val;
> +
> +		/*
> +		 * Otherwise for EOI, we use the special MMIO that does
> +		 * a clear of both P and Q and returns the old Q,
> +		 * except for LSIs where we use the "EOI cycle" special
> +		 * load.
> +		 *
> +		 * This allows us to then do a re-trigger if Q was set
> +		 * rather than synthetizing an interrupt in software
> +		 */
> +		eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
> +
> +		/* Re-trigger if needed */
> +		if ((eoi_val & 1) && __x_trig_page(xd))
> +			__raw_writeq(0, __x_trig_page(xd));
> +	}
> +}
> +
> +enum {
> +	scan_fetch,
> +	scan_poll,
> +	scan_eoi,
> +};
> +
> +static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
> +				       u8 pending, int scan_type)
> +{
> +	u32 hirq = 0;
> +	u8 prio = 0xff;
> +
> +	/* Find highest pending priority */
> +	while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
> +		struct xive_q *q;
> +		u32 idx, toggle;
> +		__be32 *qpage;
> +
> +		/*
> +		 * If pending is 0 this will return 0xff which is what
> +		 * we want
> +		 */
> +		prio = ffs(pending) - 1;
> +
> +		/* Don't scan past the guest cppr */
> +		if (prio >= xc->cppr || prio > 7) {
> +			if (xc->mfrr < xc->cppr) {
> +				prio = xc->mfrr;
> +				hirq = XICS_IPI;
> +			}
> +			break;
> +		}
> +
> +		/* Grab queue and pointers */
> +		q = &xc->queues[prio];
> +		idx = q->idx;
> +		toggle = q->toggle;
> +
> +		/*
> +		 * Snapshot the queue page. The test further down for EOI
> +		 * must use the same "copy" that was used by __xive_read_eq
> +		 * since qpage can be set concurrently and we don't want
> +		 * to miss an EOI.
> +		 */
> +		qpage = READ_ONCE(q->qpage);
> +
> +skip_ipi:
> +		/*
> +		 * Try to fetch from the queue. Will return 0 for a
> +		 * non-queueing priority (ie, qpage = 0).
> +		 */
> +		hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
> +
> +		/*
> +		 * If this was a signal for an MFFR change done by
> +		 * H_IPI we skip it. Additionally, if we were fetching
> +		 * we EOI it now, thus re-enabling reception of a new
> +		 * such signal.
> +		 *
> +		 * We also need to do that if prio is 0 and we had no
> +		 * page for the queue. In this case, we have non-queued
> +		 * IPI that needs to be EOId.
> +		 *
> +		 * This is safe because if we have another pending MFRR
> +		 * change that wasn't observed above, the Q bit will have
> +		 * been set and another occurrence of the IPI will trigger.
> +		 */
> +		if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
> +			if (scan_type == scan_fetch) {
> +				xive_vm_source_eoi(xc->vp_ipi,
> +						       &xc->vp_ipi_data);
> +				q->idx = idx;
> +				q->toggle = toggle;
> +			}
> +			/* Loop back on same queue with updated idx/toggle */
> +			WARN_ON(hirq && hirq != XICS_IPI);
> +			if (hirq)
> +				goto skip_ipi;
> +		}
> +
> +		/* If it's the dummy interrupt, continue searching */
> +		if (hirq == XICS_DUMMY)
> +			goto skip_ipi;
> +
> +		/* Clear the pending bit if the queue is now empty */
> +		if (!hirq) {
> +			pending &= ~(1 << prio);
> +
> +			/*
> +			 * Check if the queue count needs adjusting due to
> +			 * interrupts being moved away.
> +			 */
> +			if (atomic_read(&q->pending_count)) {
> +				int p = atomic_xchg(&q->pending_count, 0);
> +
> +				if (p) {
> +					WARN_ON(p > atomic_read(&q->count));
> +					atomic_sub(p, &q->count);
> +				}
> +			}
> +		}
> +
> +		/*
> +		 * If the most favoured prio we found pending is less
> +		 * favored (or equal) than a pending IPI, we return
> +		 * the IPI instead.
> +		 */
> +		if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
> +			prio = xc->mfrr;
> +			hirq = XICS_IPI;
> +			break;
> +		}
> +
> +		/* If fetching, update queue pointers */
> +		if (scan_type == scan_fetch) {
> +			q->idx = idx;
> +			q->toggle = toggle;
> +		}
> +	}
> +
> +	/* If we are just taking a "peek", do nothing else */
> +	if (scan_type == scan_poll)
> +		return hirq;
> +
> +	/* Update the pending bits */
> +	xc->pending = pending;
> +
> +	/*
> +	 * If this is an EOI that's it, no CPPR adjustment done here,
> +	 * all we needed was cleanup the stale pending bits and check
> +	 * if there's anything left.
> +	 */
> +	if (scan_type == scan_eoi)
> +		return hirq;
> +
> +	/*
> +	 * If we found an interrupt, adjust what the guest CPPR should
> +	 * be as if we had just fetched that interrupt from HW.
> +	 *
> +	 * Note: This can only make xc->cppr smaller as the previous
> +	 * loop will only exit with hirq != 0 if prio is lower than
> +	 * the current xc->cppr. Thus we don't need to re-check xc->mfrr
> +	 * for pending IPIs.
> +	 */
> +	if (hirq)
> +		xc->cppr = prio;
> +	/*
> +	 * If it was an IPI the HW CPPR might have been lowered too much
> +	 * as the HW interrupt we use for IPIs is routed to priority 0.
> +	 *
> +	 * We re-sync it here.
> +	 */
> +	if (xc->cppr != xc->hw_cppr) {
> +		xc->hw_cppr = xc->cppr;
> +		__raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
> +	}
> +
> +	return hirq;
> +}
> +
> +static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
> +{
> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +	u8 old_cppr;
> +	u32 hirq;
> +
> +	pr_devel("H_XIRR\n");
> +
> +	xc->stat_vm_h_xirr++;
> +
> +	/* First collect pending bits from HW */
> +	xive_vm_ack_pending(xc);
> +
> +	pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
> +		 xc->pending, xc->hw_cppr, xc->cppr);
> +
> +	/* Grab previous CPPR and reverse map it */
> +	old_cppr = xive_prio_to_guest(xc->cppr);
> +
> +	/* Scan for actual interrupts */
> +	hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
> +
> +	pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
> +		 hirq, xc->hw_cppr, xc->cppr);
> +
> +	/* That should never hit */
> +	if (hirq & 0xff000000)
> +		pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
> +
> +	/*
> +	 * XXX We could check if the interrupt is masked here and
> +	 * filter it. If we chose to do so, we would need to do:
> +	 *
> +	 *    if (masked) {
> +	 *        lock();
> +	 *        if (masked) {
> +	 *            old_Q = true;
> +	 *            hirq = 0;
> +	 *        }
> +	 *        unlock();
> +	 *    }
> +	 */
> +
> +	/* Return interrupt and old CPPR in GPR4 */
> +	vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
> +
> +	return H_SUCCESS;
> +}
> +
> +static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
> +{
> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +	u8 pending = xc->pending;
> +	u32 hirq;
> +
> +	pr_devel("H_IPOLL(server=%ld)\n", server);
> +
> +	xc->stat_vm_h_ipoll++;
> +
> +	/* Grab the target VCPU if not the current one */
> +	if (xc->server_num != server) {
> +		vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
> +		if (!vcpu)
> +			return H_PARAMETER;
> +		xc = vcpu->arch.xive_vcpu;
> +
> +		/* Scan all priorities */
> +		pending = 0xff;
> +	} else {
> +		/* Grab pending interrupt if any */
> +		__be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
> +		u8 pipr = be64_to_cpu(qw1) & 0xff;
> +
> +		if (pipr < 8)
> +			pending |= 1 << pipr;
> +	}
> +
> +	hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
> +
> +	/* Return interrupt and old CPPR in GPR4 */
> +	vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
> +
> +	return H_SUCCESS;
> +}
> +
> +static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
> +{
> +	u8 pending, prio;
> +
> +	pending = xc->pending;
> +	if (xc->mfrr != 0xff) {
> +		if (xc->mfrr < 8)
> +			pending |= 1 << xc->mfrr;
> +		else
> +			pending |= 0x80;
> +	}
> +	if (!pending)
> +		return;
> +	prio = ffs(pending) - 1;
> +
> +	__raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
> +}
> +
> +static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
> +					       struct kvmppc_xive_vcpu *xc)
> +{
> +	unsigned int prio;
> +
> +	/* For each priority that is now masked */
> +	for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
> +		struct xive_q *q = &xc->queues[prio];
> +		struct kvmppc_xive_irq_state *state;
> +		struct kvmppc_xive_src_block *sb;
> +		u32 idx, toggle, entry, irq, hw_num;
> +		struct xive_irq_data *xd;
> +		__be32 *qpage;
> +		u16 src;
> +
> +		idx = q->idx;
> +		toggle = q->toggle;
> +		qpage = READ_ONCE(q->qpage);
> +		if (!qpage)
> +			continue;
> +
> +		/* For each interrupt in the queue */
> +		for (;;) {
> +			entry = be32_to_cpup(qpage + idx);
> +
> +			/* No more ? */
> +			if ((entry >> 31) == toggle)
> +				break;
> +			irq = entry & 0x7fffffff;
> +
> +			/* Skip dummies and IPIs */
> +			if (irq == XICS_DUMMY || irq == XICS_IPI)
> +				goto next;
> +			sb = kvmppc_xive_find_source(xive, irq, &src);
> +			if (!sb)
> +				goto next;
> +			state = &sb->irq_state[src];
> +
> +			/* Has it been rerouted ? */
> +			if (xc->server_num == state->act_server)
> +				goto next;
> +
> +			/*
> +			 * Allright, it *has* been re-routed, kill it from
> +			 * the queue.
> +			 */
> +			qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
> +
> +			/* Find the HW interrupt */
> +			kvmppc_xive_select_irq(state, &hw_num, &xd);
> +
> +			/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
> +			if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
> +				xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
> +
> +			/* EOI the source */
> +			xive_vm_source_eoi(hw_num, xd);
> +
> +next:
> +			idx = (idx + 1) & q->msk;
> +			if (idx == 0)
> +				toggle ^= 1;
> +		}
> +	}
> +}
> +
> +static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
> +{
> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
> +	u8 old_cppr;
> +
> +	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
> +
> +	xc->stat_vm_h_cppr++;
> +
> +	/* Map CPPR */
> +	cppr = xive_prio_from_guest(cppr);
> +
> +	/* Remember old and update SW state */
> +	old_cppr = xc->cppr;
> +	xc->cppr = cppr;
> +
> +	/*
> +	 * Order the above update of xc->cppr with the subsequent
> +	 * read of xc->mfrr inside push_pending_to_hw()
> +	 */
> +	smp_mb();
> +
> +	if (cppr > old_cppr) {
> +		/*
> +		 * We are masking less, we need to look for pending things
> +		 * to deliver and set VP pending bits accordingly to trigger
> +		 * a new interrupt otherwise we might miss MFRR changes for
> +		 * which we have optimized out sending an IPI signal.
> +		 */
> +		xive_vm_push_pending_to_hw(xc);
> +	} else {
> +		/*
> +		 * We are masking more, we need to check the queue for any
> +		 * interrupt that has been routed to another CPU, take
> +		 * it out (replace it with the dummy) and retrigger it.
> +		 *
> +		 * This is necessary since those interrupts may otherwise
> +		 * never be processed, at least not until this CPU restores
> +		 * its CPPR.
> +		 *
> +		 * This is in theory racy vs. HW adding new interrupts to
> +		 * the queue. In practice this works because the interesting
> +		 * cases are when the guest has done a set_xive() to move the
> +		 * interrupt away, which flushes the xive, followed by the
> +		 * target CPU doing a H_CPPR. So any new interrupt coming into
> +		 * the queue must still be routed to us and isn't a source
> +		 * of concern.
> +		 */
> +		xive_vm_scan_for_rerouted_irqs(xive, xc);
> +	}
> +
> +	/* Apply new CPPR */
> +	xc->hw_cppr = cppr;
> +	__raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
> +
> +	return H_SUCCESS;
> +}
> +
> +static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
> +{
> +	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
> +	struct kvmppc_xive_src_block *sb;
> +	struct kvmppc_xive_irq_state *state;
> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +	struct xive_irq_data *xd;
> +	u8 new_cppr = xirr >> 24;
> +	u32 irq = xirr & 0x00ffffff, hw_num;
> +	u16 src;
> +	int rc = 0;
> +
> +	pr_devel("H_EOI(xirr=%08lx)\n", xirr);
> +
> +	xc->stat_vm_h_eoi++;
> +
> +	xc->cppr = xive_prio_from_guest(new_cppr);
> +
> +	/*
> +	 * IPIs are synthetized from MFRR and thus don't need
> +	 * any special EOI handling. The underlying interrupt
> +	 * used to signal MFRR changes is EOId when fetched from
> +	 * the queue.
> +	 */
> +	if (irq == XICS_IPI || irq == 0) {
> +		/*
> +		 * This barrier orders the setting of xc->cppr vs.
> +		 * subsquent test of xc->mfrr done inside
> +		 * scan_interrupts and push_pending_to_hw
> +		 */
> +		smp_mb();
> +		goto bail;
> +	}
> +
> +	/* Find interrupt source */
> +	sb = kvmppc_xive_find_source(xive, irq, &src);
> +	if (!sb) {
> +		pr_devel(" source not found !\n");
> +		rc = H_PARAMETER;
> +		/* Same as above */
> +		smp_mb();
> +		goto bail;
> +	}
> +	state = &sb->irq_state[src];
> +	kvmppc_xive_select_irq(state, &hw_num, &xd);
> +
> +	state->in_eoi = true;
> +
> +	/*
> +	 * This barrier orders both setting of in_eoi above vs,
> +	 * subsequent test of guest_priority, and the setting
> +	 * of xc->cppr vs. subsquent test of xc->mfrr done inside
> +	 * scan_interrupts and push_pending_to_hw
> +	 */
> +	smp_mb();
> +
> +again:
> +	if (state->guest_priority == MASKED) {
> +		arch_spin_lock(&sb->lock);
> +		if (state->guest_priority != MASKED) {
> +			arch_spin_unlock(&sb->lock);
> +			goto again;
> +		}
> +		pr_devel(" EOI on saved P...\n");
> +
> +		/* Clear old_p, that will cause unmask to perform an EOI */
> +		state->old_p = false;
> +
> +		arch_spin_unlock(&sb->lock);
> +	} else {
> +		pr_devel(" EOI on source...\n");
> +
> +		/* Perform EOI on the source */
> +		xive_vm_source_eoi(hw_num, xd);
> +
> +		/* If it's an emulated LSI, check level and resend */
> +		if (state->lsi && state->asserted)
> +			__raw_writeq(0, __x_trig_page(xd));
> +
> +	}
> +
> +	/*
> +	 * This barrier orders the above guest_priority check
> +	 * and spin_lock/unlock with clearing in_eoi below.
> +	 *
> +	 * It also has to be a full mb() as it must ensure
> +	 * the MMIOs done in source_eoi() are completed before
> +	 * state->in_eoi is visible.
> +	 */
> +	mb();
> +	state->in_eoi = false;
> +bail:
> +
> +	/* Re-evaluate pending IRQs and update HW */
> +	xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
> +	xive_vm_push_pending_to_hw(xc);
> +	pr_devel(" after scan pending=%02x\n", xc->pending);
> +
> +	/* Apply new CPPR */
> +	xc->hw_cppr = xc->cppr;
> +	__raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
> +
> +	return rc;
> +}
> +
> +static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
> +			       unsigned long mfrr)
> +{
> +	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +
> +	pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
> +
> +	xc->stat_vm_h_ipi++;
> +
> +	/* Find target */
> +	vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
> +	if (!vcpu)
> +		return H_PARAMETER;
> +	xc = vcpu->arch.xive_vcpu;
> +
> +	/* Locklessly write over MFRR */
> +	xc->mfrr = mfrr;
> +
> +	/*
> +	 * The load of xc->cppr below and the subsequent MMIO store
> +	 * to the IPI must happen after the above mfrr update is
> +	 * globally visible so that:
> +	 *
> +	 * - Synchronize with another CPU doing an H_EOI or a H_CPPR
> +	 *   updating xc->cppr then reading xc->mfrr.
> +	 *
> +	 * - The target of the IPI sees the xc->mfrr update
> +	 */
> +	mb();
> +
> +	/* Shoot the IPI if most favored than target cppr */
> +	if (mfrr < xc->cppr)
> +		__raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
> +
> +	return H_SUCCESS;
> +}
>   
>   /*
>    * We leave a gap of a couple of interrupts in the queue to
> diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
> deleted file mode 100644
> index b0015e05d99a..000000000000
> --- a/arch/powerpc/kvm/book3s_xive_template.c
> +++ /dev/null
> @@ -1,636 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0-only
> -/*
> - * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
> - */
> -
> -/* File to be included by other .c files */
> -
> -#define XGLUE(a,b) a##b
> -#define GLUE(a,b) XGLUE(a,b)
> -
> -/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
> -#define XICS_DUMMY	1
> -
> -static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
> -{
> -	u8 cppr;
> -	u16 ack;
> -
> -	/*
> -	 * Ensure any previous store to CPPR is ordered vs.
> -	 * the subsequent loads from PIPR or ACK.
> -	 */
> -	eieio();
> -
> -	/* Perform the acknowledge OS to register cycle. */
> -	ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
> -
> -	/* Synchronize subsequent queue accesses */
> -	mb();
> -
> -	/* XXX Check grouping level */
> -
> -	/* Anything ? */
> -	if (!((ack >> 8) & TM_QW1_NSR_EO))
> -		return;
> -
> -	/* Grab CPPR of the most favored pending interrupt */
> -	cppr = ack & 0xff;
> -	if (cppr < 8)
> -		xc->pending |= 1 << cppr;
> -
> -#ifdef XIVE_RUNTIME_CHECKS
> -	/* Check consistency */
> -	if (cppr >= xc->hw_cppr)
> -		pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
> -			smp_processor_id(), cppr, xc->hw_cppr);
> -#endif
> -
> -	/*
> -	 * Update our image of the HW CPPR. We don't yet modify
> -	 * xc->cppr, this will be done as we scan for interrupts
> -	 * in the queues.
> -	 */
> -	xc->hw_cppr = cppr;
> -}
> -
> -static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
> -{
> -	u64 val;
> -
> -	if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
> -		offset |= XIVE_ESB_LD_ST_MO;
> -
> -	val =__x_readq(__x_eoi_page(xd) + offset);
> -#ifdef __LITTLE_ENDIAN__
> -	val >>= 64-8;
> -#endif
> -	return (u8)val;
> -}
> -
> -
> -static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
> -{
> -	/* If the XIVE supports the new "store EOI facility, use it */
> -	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
> -		__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
> -	else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
> -		/*
> -		 * For LSIs the HW EOI cycle is used rather than PQ bits,
> -		 * as they are automatically re-triggred in HW when still
> -		 * pending.
> -		 */
> -		__x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
> -	} else {
> -		uint64_t eoi_val;
> -
> -		/*
> -		 * Otherwise for EOI, we use the special MMIO that does
> -		 * a clear of both P and Q and returns the old Q,
> -		 * except for LSIs where we use the "EOI cycle" special
> -		 * load.
> -		 *
> -		 * This allows us to then do a re-trigger if Q was set
> -		 * rather than synthetizing an interrupt in software
> -		 */
> -		eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
> -
> -		/* Re-trigger if needed */
> -		if ((eoi_val & 1) && __x_trig_page(xd))
> -			__x_writeq(0, __x_trig_page(xd));
> -	}
> -}
> -
> -enum {
> -	scan_fetch,
> -	scan_poll,
> -	scan_eoi,
> -};
> -
> -static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
> -				       u8 pending, int scan_type)
> -{
> -	u32 hirq = 0;
> -	u8 prio = 0xff;
> -
> -	/* Find highest pending priority */
> -	while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
> -		struct xive_q *q;
> -		u32 idx, toggle;
> -		__be32 *qpage;
> -
> -		/*
> -		 * If pending is 0 this will return 0xff which is what
> -		 * we want
> -		 */
> -		prio = ffs(pending) - 1;
> -
> -		/* Don't scan past the guest cppr */
> -		if (prio >= xc->cppr || prio > 7) {
> -			if (xc->mfrr < xc->cppr) {
> -				prio = xc->mfrr;
> -				hirq = XICS_IPI;
> -			}
> -			break;
> -		}
> -
> -		/* Grab queue and pointers */
> -		q = &xc->queues[prio];
> -		idx = q->idx;
> -		toggle = q->toggle;
> -
> -		/*
> -		 * Snapshot the queue page. The test further down for EOI
> -		 * must use the same "copy" that was used by __xive_read_eq
> -		 * since qpage can be set concurrently and we don't want
> -		 * to miss an EOI.
> -		 */
> -		qpage = READ_ONCE(q->qpage);
> -
> -skip_ipi:
> -		/*
> -		 * Try to fetch from the queue. Will return 0 for a
> -		 * non-queueing priority (ie, qpage = 0).
> -		 */
> -		hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
> -
> -		/*
> -		 * If this was a signal for an MFFR change done by
> -		 * H_IPI we skip it. Additionally, if we were fetching
> -		 * we EOI it now, thus re-enabling reception of a new
> -		 * such signal.
> -		 *
> -		 * We also need to do that if prio is 0 and we had no
> -		 * page for the queue. In this case, we have non-queued
> -		 * IPI that needs to be EOId.
> -		 *
> -		 * This is safe because if we have another pending MFRR
> -		 * change that wasn't observed above, the Q bit will have
> -		 * been set and another occurrence of the IPI will trigger.
> -		 */
> -		if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
> -			if (scan_type == scan_fetch) {
> -				GLUE(X_PFX,source_eoi)(xc->vp_ipi,
> -						       &xc->vp_ipi_data);
> -				q->idx = idx;
> -				q->toggle = toggle;
> -			}
> -			/* Loop back on same queue with updated idx/toggle */
> -#ifdef XIVE_RUNTIME_CHECKS
> -			WARN_ON(hirq && hirq != XICS_IPI);
> -#endif
> -			if (hirq)
> -				goto skip_ipi;
> -		}
> -
> -		/* If it's the dummy interrupt, continue searching */
> -		if (hirq == XICS_DUMMY)
> -			goto skip_ipi;
> -
> -		/* Clear the pending bit if the queue is now empty */
> -		if (!hirq) {
> -			pending &= ~(1 << prio);
> -
> -			/*
> -			 * Check if the queue count needs adjusting due to
> -			 * interrupts being moved away.
> -			 */
> -			if (atomic_read(&q->pending_count)) {
> -				int p = atomic_xchg(&q->pending_count, 0);
> -				if (p) {
> -#ifdef XIVE_RUNTIME_CHECKS
> -					WARN_ON(p > atomic_read(&q->count));
> -#endif
> -					atomic_sub(p, &q->count);
> -				}
> -			}
> -		}
> -
> -		/*
> -		 * If the most favoured prio we found pending is less
> -		 * favored (or equal) than a pending IPI, we return
> -		 * the IPI instead.
> -		 */
> -		if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
> -			prio = xc->mfrr;
> -			hirq = XICS_IPI;
> -			break;
> -		}
> -
> -		/* If fetching, update queue pointers */
> -		if (scan_type == scan_fetch) {
> -			q->idx = idx;
> -			q->toggle = toggle;
> -		}
> -	}
> -
> -	/* If we are just taking a "peek", do nothing else */
> -	if (scan_type == scan_poll)
> -		return hirq;
> -
> -	/* Update the pending bits */
> -	xc->pending = pending;
> -
> -	/*
> -	 * If this is an EOI that's it, no CPPR adjustment done here,
> -	 * all we needed was cleanup the stale pending bits and check
> -	 * if there's anything left.
> -	 */
> -	if (scan_type == scan_eoi)
> -		return hirq;
> -
> -	/*
> -	 * If we found an interrupt, adjust what the guest CPPR should
> -	 * be as if we had just fetched that interrupt from HW.
> -	 *
> -	 * Note: This can only make xc->cppr smaller as the previous
> -	 * loop will only exit with hirq != 0 if prio is lower than
> -	 * the current xc->cppr. Thus we don't need to re-check xc->mfrr
> -	 * for pending IPIs.
> -	 */
> -	if (hirq)
> -		xc->cppr = prio;
> -	/*
> -	 * If it was an IPI the HW CPPR might have been lowered too much
> -	 * as the HW interrupt we use for IPIs is routed to priority 0.
> -	 *
> -	 * We re-sync it here.
> -	 */
> -	if (xc->cppr != xc->hw_cppr) {
> -		xc->hw_cppr = xc->cppr;
> -		__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
> -	}
> -
> -	return hirq;
> -}
> -
> -X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
> -{
> -	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> -	u8 old_cppr;
> -	u32 hirq;
> -
> -	pr_devel("H_XIRR\n");
> -
> -	xc->GLUE(X_STAT_PFX,h_xirr)++;
> -
> -	/* First collect pending bits from HW */
> -	GLUE(X_PFX,ack_pending)(xc);
> -
> -	pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
> -		 xc->pending, xc->hw_cppr, xc->cppr);
> -
> -	/* Grab previous CPPR and reverse map it */
> -	old_cppr = xive_prio_to_guest(xc->cppr);
> -
> -	/* Scan for actual interrupts */
> -	hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
> -
> -	pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
> -		 hirq, xc->hw_cppr, xc->cppr);
> -
> -#ifdef XIVE_RUNTIME_CHECKS
> -	/* That should never hit */
> -	if (hirq & 0xff000000)
> -		pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
> -#endif
> -
> -	/*
> -	 * XXX We could check if the interrupt is masked here and
> -	 * filter it. If we chose to do so, we would need to do:
> -	 *
> -	 *    if (masked) {
> -	 *        lock();
> -	 *        if (masked) {
> -	 *            old_Q = true;
> -	 *            hirq = 0;
> -	 *        }
> -	 *        unlock();
> -	 *    }
> -	 */
> -
> -	/* Return interrupt and old CPPR in GPR4 */
> -	vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
> -
> -	return H_SUCCESS;
> -}
> -
> -X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
> -{
> -	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> -	u8 pending = xc->pending;
> -	u32 hirq;
> -
> -	pr_devel("H_IPOLL(server=%ld)\n", server);
> -
> -	xc->GLUE(X_STAT_PFX,h_ipoll)++;
> -
> -	/* Grab the target VCPU if not the current one */
> -	if (xc->server_num != server) {
> -		vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
> -		if (!vcpu)
> -			return H_PARAMETER;
> -		xc = vcpu->arch.xive_vcpu;
> -
> -		/* Scan all priorities */
> -		pending = 0xff;
> -	} else {
> -		/* Grab pending interrupt if any */
> -		__be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
> -		u8 pipr = be64_to_cpu(qw1) & 0xff;
> -		if (pipr < 8)
> -			pending |= 1 << pipr;
> -	}
> -
> -	hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
> -
> -	/* Return interrupt and old CPPR in GPR4 */
> -	vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
> -
> -	return H_SUCCESS;
> -}
> -
> -static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
> -{
> -	u8 pending, prio;
> -
> -	pending = xc->pending;
> -	if (xc->mfrr != 0xff) {
> -		if (xc->mfrr < 8)
> -			pending |= 1 << xc->mfrr;
> -		else
> -			pending |= 0x80;
> -	}
> -	if (!pending)
> -		return;
> -	prio = ffs(pending) - 1;
> -
> -	__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
> -}
> -
> -static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
> -					       struct kvmppc_xive_vcpu *xc)
> -{
> -	unsigned int prio;
> -
> -	/* For each priority that is now masked */
> -	for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
> -		struct xive_q *q = &xc->queues[prio];
> -		struct kvmppc_xive_irq_state *state;
> -		struct kvmppc_xive_src_block *sb;
> -		u32 idx, toggle, entry, irq, hw_num;
> -		struct xive_irq_data *xd;
> -		__be32 *qpage;
> -		u16 src;
> -
> -		idx = q->idx;
> -		toggle = q->toggle;
> -		qpage = READ_ONCE(q->qpage);
> -		if (!qpage)
> -			continue;
> -
> -		/* For each interrupt in the queue */
> -		for (;;) {
> -			entry = be32_to_cpup(qpage + idx);
> -
> -			/* No more ? */
> -			if ((entry >> 31) == toggle)
> -				break;
> -			irq = entry & 0x7fffffff;
> -
> -			/* Skip dummies and IPIs */
> -			if (irq == XICS_DUMMY || irq == XICS_IPI)
> -				goto next;
> -			sb = kvmppc_xive_find_source(xive, irq, &src);
> -			if (!sb)
> -				goto next;
> -			state = &sb->irq_state[src];
> -
> -			/* Has it been rerouted ? */
> -			if (xc->server_num == state->act_server)
> -				goto next;
> -
> -			/*
> -			 * Allright, it *has* been re-routed, kill it from
> -			 * the queue.
> -			 */
> -			qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
> -
> -			/* Find the HW interrupt */
> -			kvmppc_xive_select_irq(state, &hw_num, &xd);
> -
> -			/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
> -			if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
> -				GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
> -
> -			/* EOI the source */
> -			GLUE(X_PFX,source_eoi)(hw_num, xd);
> -
> -		next:
> -			idx = (idx + 1) & q->msk;
> -			if (idx == 0)
> -				toggle ^= 1;
> -		}
> -	}
> -}
> -
> -X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
> -{
> -	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> -	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
> -	u8 old_cppr;
> -
> -	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
> -
> -	xc->GLUE(X_STAT_PFX,h_cppr)++;
> -
> -	/* Map CPPR */
> -	cppr = xive_prio_from_guest(cppr);
> -
> -	/* Remember old and update SW state */
> -	old_cppr = xc->cppr;
> -	xc->cppr = cppr;
> -
> -	/*
> -	 * Order the above update of xc->cppr with the subsequent
> -	 * read of xc->mfrr inside push_pending_to_hw()
> -	 */
> -	smp_mb();
> -
> -	if (cppr > old_cppr) {
> -		/*
> -		 * We are masking less, we need to look for pending things
> -		 * to deliver and set VP pending bits accordingly to trigger
> -		 * a new interrupt otherwise we might miss MFRR changes for
> -		 * which we have optimized out sending an IPI signal.
> -		 */
> -		GLUE(X_PFX,push_pending_to_hw)(xc);
> -	} else {
> -		/*
> -		 * We are masking more, we need to check the queue for any
> -		 * interrupt that has been routed to another CPU, take
> -		 * it out (replace it with the dummy) and retrigger it.
> -		 *
> -		 * This is necessary since those interrupts may otherwise
> -		 * never be processed, at least not until this CPU restores
> -		 * its CPPR.
> -		 *
> -		 * This is in theory racy vs. HW adding new interrupts to
> -		 * the queue. In practice this works because the interesting
> -		 * cases are when the guest has done a set_xive() to move the
> -		 * interrupt away, which flushes the xive, followed by the
> -		 * target CPU doing a H_CPPR. So any new interrupt coming into
> -		 * the queue must still be routed to us and isn't a source
> -		 * of concern.
> -		 */
> -		GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
> -	}
> -
> -	/* Apply new CPPR */
> -	xc->hw_cppr = cppr;
> -	__x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
> -
> -	return H_SUCCESS;
> -}
> -
> -X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
> -{
> -	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
> -	struct kvmppc_xive_src_block *sb;
> -	struct kvmppc_xive_irq_state *state;
> -	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> -	struct xive_irq_data *xd;
> -	u8 new_cppr = xirr >> 24;
> -	u32 irq = xirr & 0x00ffffff, hw_num;
> -	u16 src;
> -	int rc = 0;
> -
> -	pr_devel("H_EOI(xirr=%08lx)\n", xirr);
> -
> -	xc->GLUE(X_STAT_PFX,h_eoi)++;
> -
> -	xc->cppr = xive_prio_from_guest(new_cppr);
> -
> -	/*
> -	 * IPIs are synthetized from MFRR and thus don't need
> -	 * any special EOI handling. The underlying interrupt
> -	 * used to signal MFRR changes is EOId when fetched from
> -	 * the queue.
> -	 */
> -	if (irq == XICS_IPI || irq == 0) {
> -		/*
> -		 * This barrier orders the setting of xc->cppr vs.
> -		 * subsquent test of xc->mfrr done inside
> -		 * scan_interrupts and push_pending_to_hw
> -		 */
> -		smp_mb();
> -		goto bail;
> -	}
> -
> -	/* Find interrupt source */
> -	sb = kvmppc_xive_find_source(xive, irq, &src);
> -	if (!sb) {
> -		pr_devel(" source not found !\n");
> -		rc = H_PARAMETER;
> -		/* Same as above */
> -		smp_mb();
> -		goto bail;
> -	}
> -	state = &sb->irq_state[src];
> -	kvmppc_xive_select_irq(state, &hw_num, &xd);
> -
> -	state->in_eoi = true;
> -
> -	/*
> -	 * This barrier orders both setting of in_eoi above vs,
> -	 * subsequent test of guest_priority, and the setting
> -	 * of xc->cppr vs. subsquent test of xc->mfrr done inside
> -	 * scan_interrupts and push_pending_to_hw
> -	 */
> -	smp_mb();
> -
> -again:
> -	if (state->guest_priority == MASKED) {
> -		arch_spin_lock(&sb->lock);
> -		if (state->guest_priority != MASKED) {
> -			arch_spin_unlock(&sb->lock);
> -			goto again;
> -		}
> -		pr_devel(" EOI on saved P...\n");
> -
> -		/* Clear old_p, that will cause unmask to perform an EOI */
> -		state->old_p = false;
> -
> -		arch_spin_unlock(&sb->lock);
> -	} else {
> -		pr_devel(" EOI on source...\n");
> -
> -		/* Perform EOI on the source */
> -		GLUE(X_PFX,source_eoi)(hw_num, xd);
> -
> -		/* If it's an emulated LSI, check level and resend */
> -		if (state->lsi && state->asserted)
> -			__x_writeq(0, __x_trig_page(xd));
> -
> -	}
> -
> -	/*
> -	 * This barrier orders the above guest_priority check
> -	 * and spin_lock/unlock with clearing in_eoi below.
> -	 *
> -	 * It also has to be a full mb() as it must ensure
> -	 * the MMIOs done in source_eoi() are completed before
> -	 * state->in_eoi is visible.
> -	 */
> -	mb();
> -	state->in_eoi = false;
> -bail:
> -
> -	/* Re-evaluate pending IRQs and update HW */
> -	GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
> -	GLUE(X_PFX,push_pending_to_hw)(xc);
> -	pr_devel(" after scan pending=%02x\n", xc->pending);
> -
> -	/* Apply new CPPR */
> -	xc->hw_cppr = xc->cppr;
> -	__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
> -
> -	return rc;
> -}
> -
> -X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
> -			       unsigned long mfrr)
> -{
> -	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> -
> -	pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
> -
> -	xc->GLUE(X_STAT_PFX,h_ipi)++;
> -
> -	/* Find target */
> -	vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
> -	if (!vcpu)
> -		return H_PARAMETER;
> -	xc = vcpu->arch.xive_vcpu;
> -
> -	/* Locklessly write over MFRR */
> -	xc->mfrr = mfrr;
> -
> -	/*
> -	 * The load of xc->cppr below and the subsequent MMIO store
> -	 * to the IPI must happen after the above mfrr update is
> -	 * globally visible so that:
> -	 *
> -	 * - Synchronize with another CPU doing an H_EOI or a H_CPPR
> -	 *   updating xc->cppr then reading xc->mfrr.
> -	 *
> -	 * - The target of the IPI sees the xc->mfrr update
> -	 */
> -	mb();
> -
> -	/* Shoot the IPI if most favored than target cppr */
> -	if (mfrr < xc->cppr)
> -		__x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
> -
> -	return H_SUCCESS;
> -}
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 44d74bfe05df..5003563ca38f 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -1803,11 +1803,11 @@ hcall_real_table:
>   	.long	0		/* 0x5c */
>   	.long	0		/* 0x60 */
>   #ifdef CONFIG_KVM_XICS
> -	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
> -	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
> -	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
> -	.long	DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
> -	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
> +	.long	DOTSYM(xics_rm_h_eoi) - hcall_real_table
> +	.long	DOTSYM(xics_rm_h_cppr) - hcall_real_table
> +	.long	DOTSYM(xics_rm_h_ipi) - hcall_real_table
> +	.long	0		/* 0x70 - H_IPOLL */
> +	.long	DOTSYM(xics_rm_h_xirr) - hcall_real_table
>   #else
>   	.long	0		/* 0x64 - H_EOI */
>   	.long	0		/* 0x68 - H_CPPR */
> @@ -1977,7 +1977,7 @@ hcall_real_table:
>   	.long	0		/* 0x2f4 */
>   	.long	0		/* 0x2f8 */
>   #ifdef CONFIG_KVM_XICS
> -	.long	DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
> +	.long	DOTSYM(xics_rm_h_xirr_x) - hcall_real_table
>   #else
>   	.long	0		/* 0x2fc - H_XIRR_X*/
>   #endif
Alexey Kardashevskiy May 11, 2022, 1:23 a.m. UTC | #2
On 5/11/22 03:58, Cédric Le Goater wrote:
> Hello Alexey,
> 
> On 5/9/22 09:11, Alexey Kardashevskiy wrote:
>> Currently we have 2 sets of interrupt controller hypercalls handlers
>> for real and virtual modes, this is from POWER8 times when switching
>> MMU on was considered an expensive operation.
>>
>> POWER9 however does not have dependent threads and MMU is enabled for
>> handling hcalls so the XIVE native 
> 
> XIVE native does not have any real-mode hcall handlers. In fact, all
> are handled at the QEMU level.
> 
>> or XICS-on-XIVE real mode handlers never execute on real P9 and > 
>> later CPUs.
> 
> They are not ? I am surprised. It must be a "recent" change. Any how,
> if you can remove them safely, this is good news and you should be able
> to clean up some more code in the PowerNV native interface.


Yes, this is the result of that massive work of Nick to move the KVM's 
asm to c for p9. It could have been the case even before that but harder 
to see in that asm code :)


>>
>> This untemplate the handlers and only keeps the real mode handlers for
>> XICS native (up to POWER8) and remove the rest of dead code. Changes
>> in functions are mechanical except few missing empty lines to make
>> checkpatch.pl happy.
>>
>> The default implemented hcalls list already contains XICS hcalls so
>> no change there.
>>
>> This should not cause any behavioral change.
> 
> In the worse case, it impacts performance a bit but only on "old" distros
> (kernel < 4.14), I doubt anyone will complain.
> 
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> Acked-by: Cédric Le Goater <clg@kaod.org>


Thanks!

> 
> Thanks,
> 
> C.
> 
> 
>> ---
>>   arch/powerpc/kvm/Makefile               |   2 +-
>>   arch/powerpc/include/asm/kvm_ppc.h      |   7 -
>>   arch/powerpc/kvm/book3s_xive.h          |   7 -
>>   arch/powerpc/kvm/book3s_hv_builtin.c    |  64 ---
>>   arch/powerpc/kvm/book3s_hv_rm_xics.c    |   5 +
>>   arch/powerpc/kvm/book3s_hv_rm_xive.c    |  46 --
>>   arch/powerpc/kvm/book3s_xive.c          | 638 +++++++++++++++++++++++-
>>   arch/powerpc/kvm/book3s_xive_template.c | 636 -----------------------
>>   arch/powerpc/kvm/book3s_hv_rmhandlers.S |  12 +-
>>   9 files changed, 632 insertions(+), 785 deletions(-)
>>   delete mode 100644 arch/powerpc/kvm/book3s_hv_rm_xive.c
>>   delete mode 100644 arch/powerpc/kvm/book3s_xive_template.c
>>
>> diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
>> index 8e3681a86074..f17379b0f161 100644
>> --- a/arch/powerpc/kvm/Makefile
>> +++ b/arch/powerpc/kvm/Makefile
>> @@ -73,7 +73,7 @@ kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
>>       book3s_hv_tm.o
>>   kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
>> -    book3s_hv_rm_xics.o book3s_hv_rm_xive.o
>> +    book3s_hv_rm_xics.o
>>   kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
>>       book3s_hv_tm_builtin.o
>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
>> b/arch/powerpc/include/asm/kvm_ppc.h
>> index 44200a27371b..a775377a570e 100644
>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>> @@ -787,13 +787,6 @@ long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, 
>> unsigned long flags,
>>                  unsigned long dest, unsigned long src);
>>   long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
>>                             unsigned long slb_v, unsigned int status, 
>> bool data);
>> -unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
>> -unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
>> -unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long 
>> server);
>> -int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
>> -                    unsigned long mfrr);
>> -int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
>> -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
>>   void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
>>   /*
>> diff --git a/arch/powerpc/kvm/book3s_xive.h 
>> b/arch/powerpc/kvm/book3s_xive.h
>> index 09d0657596c3..1e48f72e8aa5 100644
>> --- a/arch/powerpc/kvm/book3s_xive.h
>> +++ b/arch/powerpc/kvm/book3s_xive.h
>> @@ -285,13 +285,6 @@ static inline u32 __xive_read_eq(__be32 *qpage, 
>> u32 msk, u32 *idx, u32 *toggle)
>>       return cur & 0x7fffffff;
>>   }
>> -extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
>> -extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned 
>> long server);
>> -extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
>> -             unsigned long mfrr);
>> -extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
>> -extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
>> -
>>   /*
>>    * Common Xive routines for XICS-over-XIVE and XIVE native
>>    */
>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
>> b/arch/powerpc/kvm/book3s_hv_builtin.c
>> index 7e52d0beee77..88a8f6473c4e 100644
>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>> @@ -489,70 +489,6 @@ static long kvmppc_read_one_intr(bool *again)
>>       return kvmppc_check_passthru(xisr, xirr, again);
>>   }
>> -#ifdef CONFIG_KVM_XICS
>> -unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
>> -{
>> -    if (!kvmppc_xics_enabled(vcpu))
>> -        return H_TOO_HARD;
>> -    if (xics_on_xive())
>> -        return xive_rm_h_xirr(vcpu);
>> -    else
>> -        return xics_rm_h_xirr(vcpu);
>> -}
>> -
>> -unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
>> -{
>> -    if (!kvmppc_xics_enabled(vcpu))
>> -        return H_TOO_HARD;
>> -    vcpu->arch.regs.gpr[5] = get_tb();
>> -    if (xics_on_xive())
>> -        return xive_rm_h_xirr(vcpu);
>> -    else
>> -        return xics_rm_h_xirr(vcpu);
>> -}
>> -
>> -unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long 
>> server)
>> -{
>> -    if (!kvmppc_xics_enabled(vcpu))
>> -        return H_TOO_HARD;
>> -    if (xics_on_xive())
>> -        return xive_rm_h_ipoll(vcpu, server);
>> -    else
>> -        return H_TOO_HARD;
>> -}
>> -
>> -int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
>> -            unsigned long mfrr)
>> -{
>> -    if (!kvmppc_xics_enabled(vcpu))
>> -        return H_TOO_HARD;
>> -    if (xics_on_xive())
>> -        return xive_rm_h_ipi(vcpu, server, mfrr);
>> -    else
>> -        return xics_rm_h_ipi(vcpu, server, mfrr);
>> -}
>> -
>> -int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
>> -{
>> -    if (!kvmppc_xics_enabled(vcpu))
>> -        return H_TOO_HARD;
>> -    if (xics_on_xive())
>> -        return xive_rm_h_cppr(vcpu, cppr);
>> -    else
>> -        return xics_rm_h_cppr(vcpu, cppr);
>> -}
>> -
>> -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
>> -{
>> -    if (!kvmppc_xics_enabled(vcpu))
>> -        return H_TOO_HARD;
>> -    if (xics_on_xive())
>> -        return xive_rm_h_eoi(vcpu, xirr);
>> -    else
>> -        return xics_rm_h_eoi(vcpu, xirr);
>> -}
>> -#endif /* CONFIG_KVM_XICS */
>> -
>>   void kvmppc_bad_interrupt(struct pt_regs *regs)
>>   {
>>       /*
>> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
>> b/arch/powerpc/kvm/book3s_hv_rm_xics.c
>> index 587c33fc4564..e2246b715f68 100644
>> --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
>> +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
>> @@ -479,6 +479,11 @@ static void icp_rm_down_cppr(struct kvmppc_xics 
>> *xics, struct kvmppc_icp *icp,
>>       }
>>   }
>> +unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
>> +{
>> +    vcpu->arch.regs.gpr[5] = get_tb();
>> +    return xics_rm_h_xirr(vcpu);
>> +}
>>   unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
>>   {
>> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c 
>> b/arch/powerpc/kvm/book3s_hv_rm_xive.c
>> deleted file mode 100644
>> index dd9880731bd6..000000000000
>> --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c
>> +++ /dev/null
>> @@ -1,46 +0,0 @@
>> -// SPDX-License-Identifier: GPL-2.0
>> -#include <linux/kernel.h>
>> -#include <linux/kvm_host.h>
>> -#include <linux/err.h>
>> -#include <linux/kernel_stat.h>
>> -#include <linux/pgtable.h>
>> -
>> -#include <asm/kvm_book3s.h>
>> -#include <asm/kvm_ppc.h>
>> -#include <asm/hvcall.h>
>> -#include <asm/xics.h>
>> -#include <asm/debug.h>
>> -#include <asm/synch.h>
>> -#include <asm/cputhreads.h>
>> -#include <asm/ppc-opcode.h>
>> -#include <asm/pnv-pci.h>
>> -#include <asm/opal.h>
>> -#include <asm/smp.h>
>> -#include <asm/xive.h>
>> -#include <asm/xive-regs.h>
>> -
>> -#include "book3s_xive.h"
>> -
>> -/* XXX */
>> -#include <asm/udbg.h>
>> -//#define DBG(fmt...) udbg_printf(fmt)
>> -#define DBG(fmt...) do { } while(0)
>> -
>> -static inline void __iomem *get_tima_phys(void)
>> -{
>> -    return local_paca->kvm_hstate.xive_tima_phys;
>> -}
>> -
>> -#undef XIVE_RUNTIME_CHECKS
>> -#define X_PFX xive_rm_
>> -#define X_STATIC
>> -#define X_STAT_PFX stat_rm_
>> -#define __x_tima        get_tima_phys()
>> -#define __x_eoi_page(xd)    ((void __iomem *)((xd)->eoi_page))
>> -#define __x_trig_page(xd)    ((void __iomem *)((xd)->trig_page))
>> -#define __x_writeb    __raw_rm_writeb
>> -#define __x_readw    __raw_rm_readw
>> -#define __x_readq    __raw_rm_readq
>> -#define __x_writeq    __raw_rm_writeq
>> -
>> -#include "book3s_xive_template.c"
>> diff --git a/arch/powerpc/kvm/book3s_xive.c 
>> b/arch/powerpc/kvm/book3s_xive.c
>> index c0ce5531d9bc..65515a96498a 100644
>> --- a/arch/powerpc/kvm/book3s_xive.c
>> +++ b/arch/powerpc/kvm/book3s_xive.c
>> @@ -30,27 +30,629 @@
>>   #include "book3s_xive.h"
>> -
>> -/*
>> - * Virtual mode variants of the hcalls for use on radix/radix
>> - * with AIL. They require the VCPU's VP to be "pushed"
>> - *
>> - * We still instantiate them here because we use some of the
>> - * generated utility functions as well in this file.
>> - */
>> -#define XIVE_RUNTIME_CHECKS
>> -#define X_PFX xive_vm_
>> -#define X_STATIC static
>> -#define X_STAT_PFX stat_vm_
>> -#define __x_tima        xive_tima
>>   #define __x_eoi_page(xd)    ((void __iomem *)((xd)->eoi_mmio))
>>   #define __x_trig_page(xd)    ((void __iomem *)((xd)->trig_mmio))
>> -#define __x_writeb    __raw_writeb
>> -#define __x_readw    __raw_readw
>> -#define __x_readq    __raw_readq
>> -#define __x_writeq    __raw_writeq
>> -#include "book3s_xive_template.c"
>> +/* Dummy interrupt used when taking interrupts out of a queue in 
>> H_CPPR */
>> +#define XICS_DUMMY    1
>> +
>> +static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
>> +{
>> +    u8 cppr;
>> +    u16 ack;
>> +
>> +    /*
>> +     * Ensure any previous store to CPPR is ordered vs.
>> +     * the subsequent loads from PIPR or ACK.
>> +     */
>> +    eieio();
>> +
>> +    /* Perform the acknowledge OS to register cycle. */
>> +    ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
>> +
>> +    /* Synchronize subsequent queue accesses */
>> +    mb();
>> +
>> +    /* XXX Check grouping level */
>> +
>> +    /* Anything ? */
>> +    if (!((ack >> 8) & TM_QW1_NSR_EO))
>> +        return;
>> +
>> +    /* Grab CPPR of the most favored pending interrupt */
>> +    cppr = ack & 0xff;
>> +    if (cppr < 8)
>> +        xc->pending |= 1 << cppr;
>> +
>> +    /* Check consistency */
>> +    if (cppr >= xc->hw_cppr)
>> +        pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
>> +            smp_processor_id(), cppr, xc->hw_cppr);
>> +
>> +    /*
>> +     * Update our image of the HW CPPR. We don't yet modify
>> +     * xc->cppr, this will be done as we scan for interrupts
>> +     * in the queues.
>> +     */
>> +    xc->hw_cppr = cppr;
>> +}
>> +
>> +static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
>> +{
>> +    u64 val;
>> +
>> +    if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & 
>> XIVE_IRQ_FLAG_STORE_EOI)
>> +        offset |= XIVE_ESB_LD_ST_MO;
>> +
>> +    val = __raw_readq(__x_eoi_page(xd) + offset);
>> +#ifdef __LITTLE_ENDIAN__
>> +    val >>= 64-8;
>> +#endif
>> +    return (u8)val;
>> +}
>> +
>> +
>> +static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
>> +{
>> +    /* If the XIVE supports the new "store EOI facility, use it */
>> +    if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
>> +        __raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
>> +    else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
>> +        /*
>> +         * For LSIs the HW EOI cycle is used rather than PQ bits,
>> +         * as they are automatically re-triggred in HW when still
>> +         * pending.
>> +         */
>> +        __raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
>> +    } else {
>> +        uint64_t eoi_val;
>> +
>> +        /*
>> +         * Otherwise for EOI, we use the special MMIO that does
>> +         * a clear of both P and Q and returns the old Q,
>> +         * except for LSIs where we use the "EOI cycle" special
>> +         * load.
>> +         *
>> +         * This allows us to then do a re-trigger if Q was set
>> +         * rather than synthetizing an interrupt in software
>> +         */
>> +        eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
>> +
>> +        /* Re-trigger if needed */
>> +        if ((eoi_val & 1) && __x_trig_page(xd))
>> +            __raw_writeq(0, __x_trig_page(xd));
>> +    }
>> +}
>> +
>> +enum {
>> +    scan_fetch,
>> +    scan_poll,
>> +    scan_eoi,
>> +};
>> +
>> +static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
>> +                       u8 pending, int scan_type)
>> +{
>> +    u32 hirq = 0;
>> +    u8 prio = 0xff;
>> +
>> +    /* Find highest pending priority */
>> +    while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
>> +        struct xive_q *q;
>> +        u32 idx, toggle;
>> +        __be32 *qpage;
>> +
>> +        /*
>> +         * If pending is 0 this will return 0xff which is what
>> +         * we want
>> +         */
>> +        prio = ffs(pending) - 1;
>> +
>> +        /* Don't scan past the guest cppr */
>> +        if (prio >= xc->cppr || prio > 7) {
>> +            if (xc->mfrr < xc->cppr) {
>> +                prio = xc->mfrr;
>> +                hirq = XICS_IPI;
>> +            }
>> +            break;
>> +        }
>> +
>> +        /* Grab queue and pointers */
>> +        q = &xc->queues[prio];
>> +        idx = q->idx;
>> +        toggle = q->toggle;
>> +
>> +        /*
>> +         * Snapshot the queue page. The test further down for EOI
>> +         * must use the same "copy" that was used by __xive_read_eq
>> +         * since qpage can be set concurrently and we don't want
>> +         * to miss an EOI.
>> +         */
>> +        qpage = READ_ONCE(q->qpage);
>> +
>> +skip_ipi:
>> +        /*
>> +         * Try to fetch from the queue. Will return 0 for a
>> +         * non-queueing priority (ie, qpage = 0).
>> +         */
>> +        hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
>> +
>> +        /*
>> +         * If this was a signal for an MFFR change done by
>> +         * H_IPI we skip it. Additionally, if we were fetching
>> +         * we EOI it now, thus re-enabling reception of a new
>> +         * such signal.
>> +         *
>> +         * We also need to do that if prio is 0 and we had no
>> +         * page for the queue. In this case, we have non-queued
>> +         * IPI that needs to be EOId.
>> +         *
>> +         * This is safe because if we have another pending MFRR
>> +         * change that wasn't observed above, the Q bit will have
>> +         * been set and another occurrence of the IPI will trigger.
>> +         */
>> +        if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
>> +            if (scan_type == scan_fetch) {
>> +                xive_vm_source_eoi(xc->vp_ipi,
>> +                               &xc->vp_ipi_data);
>> +                q->idx = idx;
>> +                q->toggle = toggle;
>> +            }
>> +            /* Loop back on same queue with updated idx/toggle */
>> +            WARN_ON(hirq && hirq != XICS_IPI);
>> +            if (hirq)
>> +                goto skip_ipi;
>> +        }
>> +
>> +        /* If it's the dummy interrupt, continue searching */
>> +        if (hirq == XICS_DUMMY)
>> +            goto skip_ipi;
>> +
>> +        /* Clear the pending bit if the queue is now empty */
>> +        if (!hirq) {
>> +            pending &= ~(1 << prio);
>> +
>> +            /*
>> +             * Check if the queue count needs adjusting due to
>> +             * interrupts being moved away.
>> +             */
>> +            if (atomic_read(&q->pending_count)) {
>> +                int p = atomic_xchg(&q->pending_count, 0);
>> +
>> +                if (p) {
>> +                    WARN_ON(p > atomic_read(&q->count));
>> +                    atomic_sub(p, &q->count);
>> +                }
>> +            }
>> +        }
>> +
>> +        /*
>> +         * If the most favoured prio we found pending is less
>> +         * favored (or equal) than a pending IPI, we return
>> +         * the IPI instead.
>> +         */
>> +        if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
>> +            prio = xc->mfrr;
>> +            hirq = XICS_IPI;
>> +            break;
>> +        }
>> +
>> +        /* If fetching, update queue pointers */
>> +        if (scan_type == scan_fetch) {
>> +            q->idx = idx;
>> +            q->toggle = toggle;
>> +        }
>> +    }
>> +
>> +    /* If we are just taking a "peek", do nothing else */
>> +    if (scan_type == scan_poll)
>> +        return hirq;
>> +
>> +    /* Update the pending bits */
>> +    xc->pending = pending;
>> +
>> +    /*
>> +     * If this is an EOI that's it, no CPPR adjustment done here,
>> +     * all we needed was cleanup the stale pending bits and check
>> +     * if there's anything left.
>> +     */
>> +    if (scan_type == scan_eoi)
>> +        return hirq;
>> +
>> +    /*
>> +     * If we found an interrupt, adjust what the guest CPPR should
>> +     * be as if we had just fetched that interrupt from HW.
>> +     *
>> +     * Note: This can only make xc->cppr smaller as the previous
>> +     * loop will only exit with hirq != 0 if prio is lower than
>> +     * the current xc->cppr. Thus we don't need to re-check xc->mfrr
>> +     * for pending IPIs.
>> +     */
>> +    if (hirq)
>> +        xc->cppr = prio;
>> +    /*
>> +     * If it was an IPI the HW CPPR might have been lowered too much
>> +     * as the HW interrupt we use for IPIs is routed to priority 0.
>> +     *
>> +     * We re-sync it here.
>> +     */
>> +    if (xc->cppr != xc->hw_cppr) {
>> +        xc->hw_cppr = xc->cppr;
>> +        __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
>> +    }
>> +
>> +    return hirq;
>> +}
>> +
>> +static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
>> +{
>> +    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> +    u8 old_cppr;
>> +    u32 hirq;
>> +
>> +    pr_devel("H_XIRR\n");
>> +
>> +    xc->stat_vm_h_xirr++;
>> +
>> +    /* First collect pending bits from HW */
>> +    xive_vm_ack_pending(xc);
>> +
>> +    pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
>> +         xc->pending, xc->hw_cppr, xc->cppr);
>> +
>> +    /* Grab previous CPPR and reverse map it */
>> +    old_cppr = xive_prio_to_guest(xc->cppr);
>> +
>> +    /* Scan for actual interrupts */
>> +    hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
>> +
>> +    pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
>> +         hirq, xc->hw_cppr, xc->cppr);
>> +
>> +    /* That should never hit */
>> +    if (hirq & 0xff000000)
>> +        pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
>> +
>> +    /*
>> +     * XXX We could check if the interrupt is masked here and
>> +     * filter it. If we chose to do so, we would need to do:
>> +     *
>> +     *    if (masked) {
>> +     *        lock();
>> +     *        if (masked) {
>> +     *            old_Q = true;
>> +     *            hirq = 0;
>> +     *        }
>> +     *        unlock();
>> +     *    }
>> +     */
>> +
>> +    /* Return interrupt and old CPPR in GPR4 */
>> +    vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
>> +
>> +    return H_SUCCESS;
>> +}
>> +
>> +static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned 
>> long server)
>> +{
>> +    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> +    u8 pending = xc->pending;
>> +    u32 hirq;
>> +
>> +    pr_devel("H_IPOLL(server=%ld)\n", server);
>> +
>> +    xc->stat_vm_h_ipoll++;
>> +
>> +    /* Grab the target VCPU if not the current one */
>> +    if (xc->server_num != server) {
>> +        vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
>> +        if (!vcpu)
>> +            return H_PARAMETER;
>> +        xc = vcpu->arch.xive_vcpu;
>> +
>> +        /* Scan all priorities */
>> +        pending = 0xff;
>> +    } else {
>> +        /* Grab pending interrupt if any */
>> +        __be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
>> +        u8 pipr = be64_to_cpu(qw1) & 0xff;
>> +
>> +        if (pipr < 8)
>> +            pending |= 1 << pipr;
>> +    }
>> +
>> +    hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
>> +
>> +    /* Return interrupt and old CPPR in GPR4 */
>> +    vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
>> +
>> +    return H_SUCCESS;
>> +}
>> +
>> +static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
>> +{
>> +    u8 pending, prio;
>> +
>> +    pending = xc->pending;
>> +    if (xc->mfrr != 0xff) {
>> +        if (xc->mfrr < 8)
>> +            pending |= 1 << xc->mfrr;
>> +        else
>> +            pending |= 0x80;
>> +    }
>> +    if (!pending)
>> +        return;
>> +    prio = ffs(pending) - 1;
>> +
>> +    __raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
>> +}
>> +
>> +static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
>> +                           struct kvmppc_xive_vcpu *xc)
>> +{
>> +    unsigned int prio;
>> +
>> +    /* For each priority that is now masked */
>> +    for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
>> +        struct xive_q *q = &xc->queues[prio];
>> +        struct kvmppc_xive_irq_state *state;
>> +        struct kvmppc_xive_src_block *sb;
>> +        u32 idx, toggle, entry, irq, hw_num;
>> +        struct xive_irq_data *xd;
>> +        __be32 *qpage;
>> +        u16 src;
>> +
>> +        idx = q->idx;
>> +        toggle = q->toggle;
>> +        qpage = READ_ONCE(q->qpage);
>> +        if (!qpage)
>> +            continue;
>> +
>> +        /* For each interrupt in the queue */
>> +        for (;;) {
>> +            entry = be32_to_cpup(qpage + idx);
>> +
>> +            /* No more ? */
>> +            if ((entry >> 31) == toggle)
>> +                break;
>> +            irq = entry & 0x7fffffff;
>> +
>> +            /* Skip dummies and IPIs */
>> +            if (irq == XICS_DUMMY || irq == XICS_IPI)
>> +                goto next;
>> +            sb = kvmppc_xive_find_source(xive, irq, &src);
>> +            if (!sb)
>> +                goto next;
>> +            state = &sb->irq_state[src];
>> +
>> +            /* Has it been rerouted ? */
>> +            if (xc->server_num == state->act_server)
>> +                goto next;
>> +
>> +            /*
>> +             * Allright, it *has* been re-routed, kill it from
>> +             * the queue.
>> +             */
>> +            qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
>> +
>> +            /* Find the HW interrupt */
>> +            kvmppc_xive_select_irq(state, &hw_num, &xd);
>> +
>> +            /* If it's not an LSI, set PQ to 11 the EOI will force a 
>> resend */
>> +            if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
>> +                xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
>> +
>> +            /* EOI the source */
>> +            xive_vm_source_eoi(hw_num, xd);
>> +
>> +next:
>> +            idx = (idx + 1) & q->msk;
>> +            if (idx == 0)
>> +                toggle ^= 1;
>> +        }
>> +    }
>> +}
>> +
>> +static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
>> +{
>> +    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> +    struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>> +    u8 old_cppr;
>> +
>> +    pr_devel("H_CPPR(cppr=%ld)\n", cppr);
>> +
>> +    xc->stat_vm_h_cppr++;
>> +
>> +    /* Map CPPR */
>> +    cppr = xive_prio_from_guest(cppr);
>> +
>> +    /* Remember old and update SW state */
>> +    old_cppr = xc->cppr;
>> +    xc->cppr = cppr;
>> +
>> +    /*
>> +     * Order the above update of xc->cppr with the subsequent
>> +     * read of xc->mfrr inside push_pending_to_hw()
>> +     */
>> +    smp_mb();
>> +
>> +    if (cppr > old_cppr) {
>> +        /*
>> +         * We are masking less, we need to look for pending things
>> +         * to deliver and set VP pending bits accordingly to trigger
>> +         * a new interrupt otherwise we might miss MFRR changes for
>> +         * which we have optimized out sending an IPI signal.
>> +         */
>> +        xive_vm_push_pending_to_hw(xc);
>> +    } else {
>> +        /*
>> +         * We are masking more, we need to check the queue for any
>> +         * interrupt that has been routed to another CPU, take
>> +         * it out (replace it with the dummy) and retrigger it.
>> +         *
>> +         * This is necessary since those interrupts may otherwise
>> +         * never be processed, at least not until this CPU restores
>> +         * its CPPR.
>> +         *
>> +         * This is in theory racy vs. HW adding new interrupts to
>> +         * the queue. In practice this works because the interesting
>> +         * cases are when the guest has done a set_xive() to move the
>> +         * interrupt away, which flushes the xive, followed by the
>> +         * target CPU doing a H_CPPR. So any new interrupt coming into
>> +         * the queue must still be routed to us and isn't a source
>> +         * of concern.
>> +         */
>> +        xive_vm_scan_for_rerouted_irqs(xive, xc);
>> +    }
>> +
>> +    /* Apply new CPPR */
>> +    xc->hw_cppr = cppr;
>> +    __raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
>> +
>> +    return H_SUCCESS;
>> +}
>> +
>> +static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
>> +{
>> +    struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>> +    struct kvmppc_xive_src_block *sb;
>> +    struct kvmppc_xive_irq_state *state;
>> +    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> +    struct xive_irq_data *xd;
>> +    u8 new_cppr = xirr >> 24;
>> +    u32 irq = xirr & 0x00ffffff, hw_num;
>> +    u16 src;
>> +    int rc = 0;
>> +
>> +    pr_devel("H_EOI(xirr=%08lx)\n", xirr);
>> +
>> +    xc->stat_vm_h_eoi++;
>> +
>> +    xc->cppr = xive_prio_from_guest(new_cppr);
>> +
>> +    /*
>> +     * IPIs are synthetized from MFRR and thus don't need
>> +     * any special EOI handling. The underlying interrupt
>> +     * used to signal MFRR changes is EOId when fetched from
>> +     * the queue.
>> +     */
>> +    if (irq == XICS_IPI || irq == 0) {
>> +        /*
>> +         * This barrier orders the setting of xc->cppr vs.
>> +         * subsquent test of xc->mfrr done inside
>> +         * scan_interrupts and push_pending_to_hw
>> +         */
>> +        smp_mb();
>> +        goto bail;
>> +    }
>> +
>> +    /* Find interrupt source */
>> +    sb = kvmppc_xive_find_source(xive, irq, &src);
>> +    if (!sb) {
>> +        pr_devel(" source not found !\n");
>> +        rc = H_PARAMETER;
>> +        /* Same as above */
>> +        smp_mb();
>> +        goto bail;
>> +    }
>> +    state = &sb->irq_state[src];
>> +    kvmppc_xive_select_irq(state, &hw_num, &xd);
>> +
>> +    state->in_eoi = true;
>> +
>> +    /*
>> +     * This barrier orders both setting of in_eoi above vs,
>> +     * subsequent test of guest_priority, and the setting
>> +     * of xc->cppr vs. subsquent test of xc->mfrr done inside
>> +     * scan_interrupts and push_pending_to_hw
>> +     */
>> +    smp_mb();
>> +
>> +again:
>> +    if (state->guest_priority == MASKED) {
>> +        arch_spin_lock(&sb->lock);
>> +        if (state->guest_priority != MASKED) {
>> +            arch_spin_unlock(&sb->lock);
>> +            goto again;
>> +        }
>> +        pr_devel(" EOI on saved P...\n");
>> +
>> +        /* Clear old_p, that will cause unmask to perform an EOI */
>> +        state->old_p = false;
>> +
>> +        arch_spin_unlock(&sb->lock);
>> +    } else {
>> +        pr_devel(" EOI on source...\n");
>> +
>> +        /* Perform EOI on the source */
>> +        xive_vm_source_eoi(hw_num, xd);
>> +
>> +        /* If it's an emulated LSI, check level and resend */
>> +        if (state->lsi && state->asserted)
>> +            __raw_writeq(0, __x_trig_page(xd));
>> +
>> +    }
>> +
>> +    /*
>> +     * This barrier orders the above guest_priority check
>> +     * and spin_lock/unlock with clearing in_eoi below.
>> +     *
>> +     * It also has to be a full mb() as it must ensure
>> +     * the MMIOs done in source_eoi() are completed before
>> +     * state->in_eoi is visible.
>> +     */
>> +    mb();
>> +    state->in_eoi = false;
>> +bail:
>> +
>> +    /* Re-evaluate pending IRQs and update HW */
>> +    xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
>> +    xive_vm_push_pending_to_hw(xc);
>> +    pr_devel(" after scan pending=%02x\n", xc->pending);
>> +
>> +    /* Apply new CPPR */
>> +    xc->hw_cppr = xc->cppr;
>> +    __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
>> +
>> +    return rc;
>> +}
>> +
>> +static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
>> +                   unsigned long mfrr)
>> +{
>> +    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> +
>> +    pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
>> +
>> +    xc->stat_vm_h_ipi++;
>> +
>> +    /* Find target */
>> +    vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
>> +    if (!vcpu)
>> +        return H_PARAMETER;
>> +    xc = vcpu->arch.xive_vcpu;
>> +
>> +    /* Locklessly write over MFRR */
>> +    xc->mfrr = mfrr;
>> +
>> +    /*
>> +     * The load of xc->cppr below and the subsequent MMIO store
>> +     * to the IPI must happen after the above mfrr update is
>> +     * globally visible so that:
>> +     *
>> +     * - Synchronize with another CPU doing an H_EOI or a H_CPPR
>> +     *   updating xc->cppr then reading xc->mfrr.
>> +     *
>> +     * - The target of the IPI sees the xc->mfrr update
>> +     */
>> +    mb();
>> +
>> +    /* Shoot the IPI if most favored than target cppr */
>> +    if (mfrr < xc->cppr)
>> +        __raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
>> +
>> +    return H_SUCCESS;
>> +}
>>   /*
>>    * We leave a gap of a couple of interrupts in the queue to
>> diff --git a/arch/powerpc/kvm/book3s_xive_template.c 
>> b/arch/powerpc/kvm/book3s_xive_template.c
>> deleted file mode 100644
>> index b0015e05d99a..000000000000
>> --- a/arch/powerpc/kvm/book3s_xive_template.c
>> +++ /dev/null
>> @@ -1,636 +0,0 @@
>> -// SPDX-License-Identifier: GPL-2.0-only
>> -/*
>> - * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
>> - */
>> -
>> -/* File to be included by other .c files */
>> -
>> -#define XGLUE(a,b) a##b
>> -#define GLUE(a,b) XGLUE(a,b)
>> -
>> -/* Dummy interrupt used when taking interrupts out of a queue in 
>> H_CPPR */
>> -#define XICS_DUMMY    1
>> -
>> -static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
>> -{
>> -    u8 cppr;
>> -    u16 ack;
>> -
>> -    /*
>> -     * Ensure any previous store to CPPR is ordered vs.
>> -     * the subsequent loads from PIPR or ACK.
>> -     */
>> -    eieio();
>> -
>> -    /* Perform the acknowledge OS to register cycle. */
>> -    ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
>> -
>> -    /* Synchronize subsequent queue accesses */
>> -    mb();
>> -
>> -    /* XXX Check grouping level */
>> -
>> -    /* Anything ? */
>> -    if (!((ack >> 8) & TM_QW1_NSR_EO))
>> -        return;
>> -
>> -    /* Grab CPPR of the most favored pending interrupt */
>> -    cppr = ack & 0xff;
>> -    if (cppr < 8)
>> -        xc->pending |= 1 << cppr;
>> -
>> -#ifdef XIVE_RUNTIME_CHECKS
>> -    /* Check consistency */
>> -    if (cppr >= xc->hw_cppr)
>> -        pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
>> -            smp_processor_id(), cppr, xc->hw_cppr);
>> -#endif
>> -
>> -    /*
>> -     * Update our image of the HW CPPR. We don't yet modify
>> -     * xc->cppr, this will be done as we scan for interrupts
>> -     * in the queues.
>> -     */
>> -    xc->hw_cppr = cppr;
>> -}
>> -
>> -static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
>> -{
>> -    u64 val;
>> -
>> -    if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & 
>> XIVE_IRQ_FLAG_STORE_EOI)
>> -        offset |= XIVE_ESB_LD_ST_MO;
>> -
>> -    val =__x_readq(__x_eoi_page(xd) + offset);
>> -#ifdef __LITTLE_ENDIAN__
>> -    val >>= 64-8;
>> -#endif
>> -    return (u8)val;
>> -}
>> -
>> -
>> -static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
>> -{
>> -    /* If the XIVE supports the new "store EOI facility, use it */
>> -    if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
>> -        __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
>> -    else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
>> -        /*
>> -         * For LSIs the HW EOI cycle is used rather than PQ bits,
>> -         * as they are automatically re-triggred in HW when still
>> -         * pending.
>> -         */
>> -        __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
>> -    } else {
>> -        uint64_t eoi_val;
>> -
>> -        /*
>> -         * Otherwise for EOI, we use the special MMIO that does
>> -         * a clear of both P and Q and returns the old Q,
>> -         * except for LSIs where we use the "EOI cycle" special
>> -         * load.
>> -         *
>> -         * This allows us to then do a re-trigger if Q was set
>> -         * rather than synthetizing an interrupt in software
>> -         */
>> -        eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
>> -
>> -        /* Re-trigger if needed */
>> -        if ((eoi_val & 1) && __x_trig_page(xd))
>> -            __x_writeq(0, __x_trig_page(xd));
>> -    }
>> -}
>> -
>> -enum {
>> -    scan_fetch,
>> -    scan_poll,
>> -    scan_eoi,
>> -};
>> -
>> -static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
>> -                       u8 pending, int scan_type)
>> -{
>> -    u32 hirq = 0;
>> -    u8 prio = 0xff;
>> -
>> -    /* Find highest pending priority */
>> -    while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
>> -        struct xive_q *q;
>> -        u32 idx, toggle;
>> -        __be32 *qpage;
>> -
>> -        /*
>> -         * If pending is 0 this will return 0xff which is what
>> -         * we want
>> -         */
>> -        prio = ffs(pending) - 1;
>> -
>> -        /* Don't scan past the guest cppr */
>> -        if (prio >= xc->cppr || prio > 7) {
>> -            if (xc->mfrr < xc->cppr) {
>> -                prio = xc->mfrr;
>> -                hirq = XICS_IPI;
>> -            }
>> -            break;
>> -        }
>> -
>> -        /* Grab queue and pointers */
>> -        q = &xc->queues[prio];
>> -        idx = q->idx;
>> -        toggle = q->toggle;
>> -
>> -        /*
>> -         * Snapshot the queue page. The test further down for EOI
>> -         * must use the same "copy" that was used by __xive_read_eq
>> -         * since qpage can be set concurrently and we don't want
>> -         * to miss an EOI.
>> -         */
>> -        qpage = READ_ONCE(q->qpage);
>> -
>> -skip_ipi:
>> -        /*
>> -         * Try to fetch from the queue. Will return 0 for a
>> -         * non-queueing priority (ie, qpage = 0).
>> -         */
>> -        hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
>> -
>> -        /*
>> -         * If this was a signal for an MFFR change done by
>> -         * H_IPI we skip it. Additionally, if we were fetching
>> -         * we EOI it now, thus re-enabling reception of a new
>> -         * such signal.
>> -         *
>> -         * We also need to do that if prio is 0 and we had no
>> -         * page for the queue. In this case, we have non-queued
>> -         * IPI that needs to be EOId.
>> -         *
>> -         * This is safe because if we have another pending MFRR
>> -         * change that wasn't observed above, the Q bit will have
>> -         * been set and another occurrence of the IPI will trigger.
>> -         */
>> -        if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
>> -            if (scan_type == scan_fetch) {
>> -                GLUE(X_PFX,source_eoi)(xc->vp_ipi,
>> -                               &xc->vp_ipi_data);
>> -                q->idx = idx;
>> -                q->toggle = toggle;
>> -            }
>> -            /* Loop back on same queue with updated idx/toggle */
>> -#ifdef XIVE_RUNTIME_CHECKS
>> -            WARN_ON(hirq && hirq != XICS_IPI);
>> -#endif
>> -            if (hirq)
>> -                goto skip_ipi;
>> -        }
>> -
>> -        /* If it's the dummy interrupt, continue searching */
>> -        if (hirq == XICS_DUMMY)
>> -            goto skip_ipi;
>> -
>> -        /* Clear the pending bit if the queue is now empty */
>> -        if (!hirq) {
>> -            pending &= ~(1 << prio);
>> -
>> -            /*
>> -             * Check if the queue count needs adjusting due to
>> -             * interrupts being moved away.
>> -             */
>> -            if (atomic_read(&q->pending_count)) {
>> -                int p = atomic_xchg(&q->pending_count, 0);
>> -                if (p) {
>> -#ifdef XIVE_RUNTIME_CHECKS
>> -                    WARN_ON(p > atomic_read(&q->count));
>> -#endif
>> -                    atomic_sub(p, &q->count);
>> -                }
>> -            }
>> -        }
>> -
>> -        /*
>> -         * If the most favoured prio we found pending is less
>> -         * favored (or equal) than a pending IPI, we return
>> -         * the IPI instead.
>> -         */
>> -        if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
>> -            prio = xc->mfrr;
>> -            hirq = XICS_IPI;
>> -            break;
>> -        }
>> -
>> -        /* If fetching, update queue pointers */
>> -        if (scan_type == scan_fetch) {
>> -            q->idx = idx;
>> -            q->toggle = toggle;
>> -        }
>> -    }
>> -
>> -    /* If we are just taking a "peek", do nothing else */
>> -    if (scan_type == scan_poll)
>> -        return hirq;
>> -
>> -    /* Update the pending bits */
>> -    xc->pending = pending;
>> -
>> -    /*
>> -     * If this is an EOI that's it, no CPPR adjustment done here,
>> -     * all we needed was cleanup the stale pending bits and check
>> -     * if there's anything left.
>> -     */
>> -    if (scan_type == scan_eoi)
>> -        return hirq;
>> -
>> -    /*
>> -     * If we found an interrupt, adjust what the guest CPPR should
>> -     * be as if we had just fetched that interrupt from HW.
>> -     *
>> -     * Note: This can only make xc->cppr smaller as the previous
>> -     * loop will only exit with hirq != 0 if prio is lower than
>> -     * the current xc->cppr. Thus we don't need to re-check xc->mfrr
>> -     * for pending IPIs.
>> -     */
>> -    if (hirq)
>> -        xc->cppr = prio;
>> -    /*
>> -     * If it was an IPI the HW CPPR might have been lowered too much
>> -     * as the HW interrupt we use for IPIs is routed to priority 0.
>> -     *
>> -     * We re-sync it here.
>> -     */
>> -    if (xc->cppr != xc->hw_cppr) {
>> -        xc->hw_cppr = xc->cppr;
>> -        __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
>> -    }
>> -
>> -    return hirq;
>> -}
>> -
>> -X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
>> -{
>> -    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> -    u8 old_cppr;
>> -    u32 hirq;
>> -
>> -    pr_devel("H_XIRR\n");
>> -
>> -    xc->GLUE(X_STAT_PFX,h_xirr)++;
>> -
>> -    /* First collect pending bits from HW */
>> -    GLUE(X_PFX,ack_pending)(xc);
>> -
>> -    pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
>> -         xc->pending, xc->hw_cppr, xc->cppr);
>> -
>> -    /* Grab previous CPPR and reverse map it */
>> -    old_cppr = xive_prio_to_guest(xc->cppr);
>> -
>> -    /* Scan for actual interrupts */
>> -    hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
>> -
>> -    pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
>> -         hirq, xc->hw_cppr, xc->cppr);
>> -
>> -#ifdef XIVE_RUNTIME_CHECKS
>> -    /* That should never hit */
>> -    if (hirq & 0xff000000)
>> -        pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
>> -#endif
>> -
>> -    /*
>> -     * XXX We could check if the interrupt is masked here and
>> -     * filter it. If we chose to do so, we would need to do:
>> -     *
>> -     *    if (masked) {
>> -     *        lock();
>> -     *        if (masked) {
>> -     *            old_Q = true;
>> -     *            hirq = 0;
>> -     *        }
>> -     *        unlock();
>> -     *    }
>> -     */
>> -
>> -    /* Return interrupt and old CPPR in GPR4 */
>> -    vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
>> -
>> -    return H_SUCCESS;
>> -}
>> -
>> -X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, 
>> unsigned long server)
>> -{
>> -    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> -    u8 pending = xc->pending;
>> -    u32 hirq;
>> -
>> -    pr_devel("H_IPOLL(server=%ld)\n", server);
>> -
>> -    xc->GLUE(X_STAT_PFX,h_ipoll)++;
>> -
>> -    /* Grab the target VCPU if not the current one */
>> -    if (xc->server_num != server) {
>> -        vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
>> -        if (!vcpu)
>> -            return H_PARAMETER;
>> -        xc = vcpu->arch.xive_vcpu;
>> -
>> -        /* Scan all priorities */
>> -        pending = 0xff;
>> -    } else {
>> -        /* Grab pending interrupt if any */
>> -        __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
>> -        u8 pipr = be64_to_cpu(qw1) & 0xff;
>> -        if (pipr < 8)
>> -            pending |= 1 << pipr;
>> -    }
>> -
>> -    hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
>> -
>> -    /* Return interrupt and old CPPR in GPR4 */
>> -    vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
>> -
>> -    return H_SUCCESS;
>> -}
>> -
>> -static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
>> -{
>> -    u8 pending, prio;
>> -
>> -    pending = xc->pending;
>> -    if (xc->mfrr != 0xff) {
>> -        if (xc->mfrr < 8)
>> -            pending |= 1 << xc->mfrr;
>> -        else
>> -            pending |= 0x80;
>> -    }
>> -    if (!pending)
>> -        return;
>> -    prio = ffs(pending) - 1;
>> -
>> -    __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
>> -}
>> -
>> -static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
>> -                           struct kvmppc_xive_vcpu *xc)
>> -{
>> -    unsigned int prio;
>> -
>> -    /* For each priority that is now masked */
>> -    for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
>> -        struct xive_q *q = &xc->queues[prio];
>> -        struct kvmppc_xive_irq_state *state;
>> -        struct kvmppc_xive_src_block *sb;
>> -        u32 idx, toggle, entry, irq, hw_num;
>> -        struct xive_irq_data *xd;
>> -        __be32 *qpage;
>> -        u16 src;
>> -
>> -        idx = q->idx;
>> -        toggle = q->toggle;
>> -        qpage = READ_ONCE(q->qpage);
>> -        if (!qpage)
>> -            continue;
>> -
>> -        /* For each interrupt in the queue */
>> -        for (;;) {
>> -            entry = be32_to_cpup(qpage + idx);
>> -
>> -            /* No more ? */
>> -            if ((entry >> 31) == toggle)
>> -                break;
>> -            irq = entry & 0x7fffffff;
>> -
>> -            /* Skip dummies and IPIs */
>> -            if (irq == XICS_DUMMY || irq == XICS_IPI)
>> -                goto next;
>> -            sb = kvmppc_xive_find_source(xive, irq, &src);
>> -            if (!sb)
>> -                goto next;
>> -            state = &sb->irq_state[src];
>> -
>> -            /* Has it been rerouted ? */
>> -            if (xc->server_num == state->act_server)
>> -                goto next;
>> -
>> -            /*
>> -             * Allright, it *has* been re-routed, kill it from
>> -             * the queue.
>> -             */
>> -            qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
>> -
>> -            /* Find the HW interrupt */
>> -            kvmppc_xive_select_irq(state, &hw_num, &xd);
>> -
>> -            /* If it's not an LSI, set PQ to 11 the EOI will force a 
>> resend */
>> -            if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
>> -                GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
>> -
>> -            /* EOI the source */
>> -            GLUE(X_PFX,source_eoi)(hw_num, xd);
>> -
>> -        next:
>> -            idx = (idx + 1) & q->msk;
>> -            if (idx == 0)
>> -                toggle ^= 1;
>> -        }
>> -    }
>> -}
>> -
>> -X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long 
>> cppr)
>> -{
>> -    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> -    struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>> -    u8 old_cppr;
>> -
>> -    pr_devel("H_CPPR(cppr=%ld)\n", cppr);
>> -
>> -    xc->GLUE(X_STAT_PFX,h_cppr)++;
>> -
>> -    /* Map CPPR */
>> -    cppr = xive_prio_from_guest(cppr);
>> -
>> -    /* Remember old and update SW state */
>> -    old_cppr = xc->cppr;
>> -    xc->cppr = cppr;
>> -
>> -    /*
>> -     * Order the above update of xc->cppr with the subsequent
>> -     * read of xc->mfrr inside push_pending_to_hw()
>> -     */
>> -    smp_mb();
>> -
>> -    if (cppr > old_cppr) {
>> -        /*
>> -         * We are masking less, we need to look for pending things
>> -         * to deliver and set VP pending bits accordingly to trigger
>> -         * a new interrupt otherwise we might miss MFRR changes for
>> -         * which we have optimized out sending an IPI signal.
>> -         */
>> -        GLUE(X_PFX,push_pending_to_hw)(xc);
>> -    } else {
>> -        /*
>> -         * We are masking more, we need to check the queue for any
>> -         * interrupt that has been routed to another CPU, take
>> -         * it out (replace it with the dummy) and retrigger it.
>> -         *
>> -         * This is necessary since those interrupts may otherwise
>> -         * never be processed, at least not until this CPU restores
>> -         * its CPPR.
>> -         *
>> -         * This is in theory racy vs. HW adding new interrupts to
>> -         * the queue. In practice this works because the interesting
>> -         * cases are when the guest has done a set_xive() to move the
>> -         * interrupt away, which flushes the xive, followed by the
>> -         * target CPU doing a H_CPPR. So any new interrupt coming into
>> -         * the queue must still be routed to us and isn't a source
>> -         * of concern.
>> -         */
>> -        GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
>> -    }
>> -
>> -    /* Apply new CPPR */
>> -    xc->hw_cppr = cppr;
>> -    __x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
>> -
>> -    return H_SUCCESS;
>> -}
>> -
>> -X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long 
>> xirr)
>> -{
>> -    struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>> -    struct kvmppc_xive_src_block *sb;
>> -    struct kvmppc_xive_irq_state *state;
>> -    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> -    struct xive_irq_data *xd;
>> -    u8 new_cppr = xirr >> 24;
>> -    u32 irq = xirr & 0x00ffffff, hw_num;
>> -    u16 src;
>> -    int rc = 0;
>> -
>> -    pr_devel("H_EOI(xirr=%08lx)\n", xirr);
>> -
>> -    xc->GLUE(X_STAT_PFX,h_eoi)++;
>> -
>> -    xc->cppr = xive_prio_from_guest(new_cppr);
>> -
>> -    /*
>> -     * IPIs are synthetized from MFRR and thus don't need
>> -     * any special EOI handling. The underlying interrupt
>> -     * used to signal MFRR changes is EOId when fetched from
>> -     * the queue.
>> -     */
>> -    if (irq == XICS_IPI || irq == 0) {
>> -        /*
>> -         * This barrier orders the setting of xc->cppr vs.
>> -         * subsquent test of xc->mfrr done inside
>> -         * scan_interrupts and push_pending_to_hw
>> -         */
>> -        smp_mb();
>> -        goto bail;
>> -    }
>> -
>> -    /* Find interrupt source */
>> -    sb = kvmppc_xive_find_source(xive, irq, &src);
>> -    if (!sb) {
>> -        pr_devel(" source not found !\n");
>> -        rc = H_PARAMETER;
>> -        /* Same as above */
>> -        smp_mb();
>> -        goto bail;
>> -    }
>> -    state = &sb->irq_state[src];
>> -    kvmppc_xive_select_irq(state, &hw_num, &xd);
>> -
>> -    state->in_eoi = true;
>> -
>> -    /*
>> -     * This barrier orders both setting of in_eoi above vs,
>> -     * subsequent test of guest_priority, and the setting
>> -     * of xc->cppr vs. subsquent test of xc->mfrr done inside
>> -     * scan_interrupts and push_pending_to_hw
>> -     */
>> -    smp_mb();
>> -
>> -again:
>> -    if (state->guest_priority == MASKED) {
>> -        arch_spin_lock(&sb->lock);
>> -        if (state->guest_priority != MASKED) {
>> -            arch_spin_unlock(&sb->lock);
>> -            goto again;
>> -        }
>> -        pr_devel(" EOI on saved P...\n");
>> -
>> -        /* Clear old_p, that will cause unmask to perform an EOI */
>> -        state->old_p = false;
>> -
>> -        arch_spin_unlock(&sb->lock);
>> -    } else {
>> -        pr_devel(" EOI on source...\n");
>> -
>> -        /* Perform EOI on the source */
>> -        GLUE(X_PFX,source_eoi)(hw_num, xd);
>> -
>> -        /* If it's an emulated LSI, check level and resend */
>> -        if (state->lsi && state->asserted)
>> -            __x_writeq(0, __x_trig_page(xd));
>> -
>> -    }
>> -
>> -    /*
>> -     * This barrier orders the above guest_priority check
>> -     * and spin_lock/unlock with clearing in_eoi below.
>> -     *
>> -     * It also has to be a full mb() as it must ensure
>> -     * the MMIOs done in source_eoi() are completed before
>> -     * state->in_eoi is visible.
>> -     */
>> -    mb();
>> -    state->in_eoi = false;
>> -bail:
>> -
>> -    /* Re-evaluate pending IRQs and update HW */
>> -    GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
>> -    GLUE(X_PFX,push_pending_to_hw)(xc);
>> -    pr_devel(" after scan pending=%02x\n", xc->pending);
>> -
>> -    /* Apply new CPPR */
>> -    xc->hw_cppr = xc->cppr;
>> -    __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
>> -
>> -    return rc;
>> -}
>> -
>> -X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long 
>> server,
>> -                   unsigned long mfrr)
>> -{
>> -    struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>> -
>> -    pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
>> -
>> -    xc->GLUE(X_STAT_PFX,h_ipi)++;
>> -
>> -    /* Find target */
>> -    vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
>> -    if (!vcpu)
>> -        return H_PARAMETER;
>> -    xc = vcpu->arch.xive_vcpu;
>> -
>> -    /* Locklessly write over MFRR */
>> -    xc->mfrr = mfrr;
>> -
>> -    /*
>> -     * The load of xc->cppr below and the subsequent MMIO store
>> -     * to the IPI must happen after the above mfrr update is
>> -     * globally visible so that:
>> -     *
>> -     * - Synchronize with another CPU doing an H_EOI or a H_CPPR
>> -     *   updating xc->cppr then reading xc->mfrr.
>> -     *
>> -     * - The target of the IPI sees the xc->mfrr update
>> -     */
>> -    mb();
>> -
>> -    /* Shoot the IPI if most favored than target cppr */
>> -    if (mfrr < xc->cppr)
>> -        __x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
>> -
>> -    return H_SUCCESS;
>> -}
>> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
>> b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> index 44d74bfe05df..5003563ca38f 100644
>> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> @@ -1803,11 +1803,11 @@ hcall_real_table:
>>       .long    0        /* 0x5c */
>>       .long    0        /* 0x60 */
>>   #ifdef CONFIG_KVM_XICS
>> -    .long    DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
>> -    .long    DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
>> -    .long    DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
>> -    .long    DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
>> -    .long    DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
>> +    .long    DOTSYM(xics_rm_h_eoi) - hcall_real_table
>> +    .long    DOTSYM(xics_rm_h_cppr) - hcall_real_table
>> +    .long    DOTSYM(xics_rm_h_ipi) - hcall_real_table
>> +    .long    0        /* 0x70 - H_IPOLL */
>> +    .long    DOTSYM(xics_rm_h_xirr) - hcall_real_table
>>   #else
>>       .long    0        /* 0x64 - H_EOI */
>>       .long    0        /* 0x68 - H_CPPR */
>> @@ -1977,7 +1977,7 @@ hcall_real_table:
>>       .long    0        /* 0x2f4 */
>>       .long    0        /* 0x2f8 */
>>   #ifdef CONFIG_KVM_XICS
>> -    .long    DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
>> +    .long    DOTSYM(xics_rm_h_xirr_x) - hcall_real_table
>>   #else
>>       .long    0        /* 0x2fc - H_XIRR_X*/
>>   #endif
>
kernel test robot May 12, 2022, 5:59 p.m. UTC | #3
Hi Alexey,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/topic/ppc-kvm]
[also build test ERROR on v5.18-rc6 next-20220512]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Alexey-Kardashevskiy/KVM-PPC-Book3s-Remove-real-mode-interrupt-controller-hcalls-handlers/20220509-151356
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git topic/ppc-kvm
config: powerpc64-defconfig (https://download.01.org/0day-ci/archive/20220513/202205130131.pJEWLeCR-lkp@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/5689429d96c7921451769fa24e3d037147c7da11
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Alexey-Kardashevskiy/KVM-PPC-Book3s-Remove-real-mode-interrupt-controller-hcalls-handlers/20220509-151356
        git checkout 5689429d96c7921451769fa24e3d037147c7da11
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/kvm/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> arch/powerpc/kvm/book3s_hv_rm_xics.c:482:15: error: no previous prototype for 'xics_rm_h_xirr_x' [-Werror=missing-prototypes]
     482 | unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
         |               ^~~~~~~~~~~~~~~~
   cc1: all warnings being treated as errors


vim +/xics_rm_h_xirr_x +482 arch/powerpc/kvm/book3s_hv_rm_xics.c

   481	
 > 482	unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
   483	{
   484		vcpu->arch.regs.gpr[5] = get_tb();
   485		return xics_rm_h_xirr(vcpu);
   486	}
   487
diff mbox series

Patch

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 8e3681a86074..f17379b0f161 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -73,7 +73,7 @@  kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
 	book3s_hv_tm.o
 
 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
-	book3s_hv_rm_xics.o book3s_hv_rm_xive.o
+	book3s_hv_rm_xics.o
 
 kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
 	book3s_hv_tm_builtin.o
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 44200a27371b..a775377a570e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -787,13 +787,6 @@  long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
 			   unsigned long dest, unsigned long src);
 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
                           unsigned long slb_v, unsigned int status, bool data);
-unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
-unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
-unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
-int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
-                    unsigned long mfrr);
-int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
-int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
 void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
 
 /*
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 09d0657596c3..1e48f72e8aa5 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -285,13 +285,6 @@  static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
 	return cur & 0x7fffffff;
 }
 
-extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
-extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
-extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
-			 unsigned long mfrr);
-extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
-extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
-
 /*
  * Common Xive routines for XICS-over-XIVE and XIVE native
  */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7e52d0beee77..88a8f6473c4e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -489,70 +489,6 @@  static long kvmppc_read_one_intr(bool *again)
 	return kvmppc_check_passthru(xisr, xirr, again);
 }
 
-#ifdef CONFIG_KVM_XICS
-unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
-{
-	if (!kvmppc_xics_enabled(vcpu))
-		return H_TOO_HARD;
-	if (xics_on_xive())
-		return xive_rm_h_xirr(vcpu);
-	else
-		return xics_rm_h_xirr(vcpu);
-}
-
-unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
-{
-	if (!kvmppc_xics_enabled(vcpu))
-		return H_TOO_HARD;
-	vcpu->arch.regs.gpr[5] = get_tb();
-	if (xics_on_xive())
-		return xive_rm_h_xirr(vcpu);
-	else
-		return xics_rm_h_xirr(vcpu);
-}
-
-unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
-{
-	if (!kvmppc_xics_enabled(vcpu))
-		return H_TOO_HARD;
-	if (xics_on_xive())
-		return xive_rm_h_ipoll(vcpu, server);
-	else
-		return H_TOO_HARD;
-}
-
-int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
-		    unsigned long mfrr)
-{
-	if (!kvmppc_xics_enabled(vcpu))
-		return H_TOO_HARD;
-	if (xics_on_xive())
-		return xive_rm_h_ipi(vcpu, server, mfrr);
-	else
-		return xics_rm_h_ipi(vcpu, server, mfrr);
-}
-
-int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
-{
-	if (!kvmppc_xics_enabled(vcpu))
-		return H_TOO_HARD;
-	if (xics_on_xive())
-		return xive_rm_h_cppr(vcpu, cppr);
-	else
-		return xics_rm_h_cppr(vcpu, cppr);
-}
-
-int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
-{
-	if (!kvmppc_xics_enabled(vcpu))
-		return H_TOO_HARD;
-	if (xics_on_xive())
-		return xive_rm_h_eoi(vcpu, xirr);
-	else
-		return xics_rm_h_eoi(vcpu, xirr);
-}
-#endif /* CONFIG_KVM_XICS */
-
 void kvmppc_bad_interrupt(struct pt_regs *regs)
 {
 	/*
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 587c33fc4564..e2246b715f68 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -479,6 +479,11 @@  static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 	}
 }
 
+unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.regs.gpr[5] = get_tb();
+	return xics_rm_h_xirr(vcpu);
+}
 
 unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c
deleted file mode 100644
index dd9880731bd6..000000000000
--- a/arch/powerpc/kvm/book3s_hv_rm_xive.c
+++ /dev/null
@@ -1,46 +0,0 @@ 
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/kvm_host.h>
-#include <linux/err.h>
-#include <linux/kernel_stat.h>
-#include <linux/pgtable.h>
-
-#include <asm/kvm_book3s.h>
-#include <asm/kvm_ppc.h>
-#include <asm/hvcall.h>
-#include <asm/xics.h>
-#include <asm/debug.h>
-#include <asm/synch.h>
-#include <asm/cputhreads.h>
-#include <asm/ppc-opcode.h>
-#include <asm/pnv-pci.h>
-#include <asm/opal.h>
-#include <asm/smp.h>
-#include <asm/xive.h>
-#include <asm/xive-regs.h>
-
-#include "book3s_xive.h"
-
-/* XXX */
-#include <asm/udbg.h>
-//#define DBG(fmt...) udbg_printf(fmt)
-#define DBG(fmt...) do { } while(0)
-
-static inline void __iomem *get_tima_phys(void)
-{
-	return local_paca->kvm_hstate.xive_tima_phys;
-}
-
-#undef XIVE_RUNTIME_CHECKS
-#define X_PFX xive_rm_
-#define X_STATIC
-#define X_STAT_PFX stat_rm_
-#define __x_tima		get_tima_phys()
-#define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_page))
-#define __x_trig_page(xd)	((void __iomem *)((xd)->trig_page))
-#define __x_writeb	__raw_rm_writeb
-#define __x_readw	__raw_rm_readw
-#define __x_readq	__raw_rm_readq
-#define __x_writeq	__raw_rm_writeq
-
-#include "book3s_xive_template.c"
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index c0ce5531d9bc..65515a96498a 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -30,27 +30,629 @@ 
 
 #include "book3s_xive.h"
 
-
-/*
- * Virtual mode variants of the hcalls for use on radix/radix
- * with AIL. They require the VCPU's VP to be "pushed"
- *
- * We still instantiate them here because we use some of the
- * generated utility functions as well in this file.
- */
-#define XIVE_RUNTIME_CHECKS
-#define X_PFX xive_vm_
-#define X_STATIC static
-#define X_STAT_PFX stat_vm_
-#define __x_tima		xive_tima
 #define __x_eoi_page(xd)	((void __iomem *)((xd)->eoi_mmio))
 #define __x_trig_page(xd)	((void __iomem *)((xd)->trig_mmio))
-#define __x_writeb	__raw_writeb
-#define __x_readw	__raw_readw
-#define __x_readq	__raw_readq
-#define __x_writeq	__raw_writeq
 
-#include "book3s_xive_template.c"
+/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
+#define XICS_DUMMY	1
+
+static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
+{
+	u8 cppr;
+	u16 ack;
+
+	/*
+	 * Ensure any previous store to CPPR is ordered vs.
+	 * the subsequent loads from PIPR or ACK.
+	 */
+	eieio();
+
+	/* Perform the acknowledge OS to register cycle. */
+	ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+
+	/* Synchronize subsequent queue accesses */
+	mb();
+
+	/* XXX Check grouping level */
+
+	/* Anything ? */
+	if (!((ack >> 8) & TM_QW1_NSR_EO))
+		return;
+
+	/* Grab CPPR of the most favored pending interrupt */
+	cppr = ack & 0xff;
+	if (cppr < 8)
+		xc->pending |= 1 << cppr;
+
+	/* Check consistency */
+	if (cppr >= xc->hw_cppr)
+		pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
+			smp_processor_id(), cppr, xc->hw_cppr);
+
+	/*
+	 * Update our image of the HW CPPR. We don't yet modify
+	 * xc->cppr, this will be done as we scan for interrupts
+	 * in the queues.
+	 */
+	xc->hw_cppr = cppr;
+}
+
+static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
+{
+	u64 val;
+
+	if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+		offset |= XIVE_ESB_LD_ST_MO;
+
+	val = __raw_readq(__x_eoi_page(xd) + offset);
+#ifdef __LITTLE_ENDIAN__
+	val >>= 64-8;
+#endif
+	return (u8)val;
+}
+
+
+static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
+{
+	/* If the XIVE supports the new "store EOI facility, use it */
+	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+		__raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
+	else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+		/*
+		 * For LSIs the HW EOI cycle is used rather than PQ bits,
+		 * as they are automatically re-triggred in HW when still
+		 * pending.
+		 */
+		__raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
+	} else {
+		uint64_t eoi_val;
+
+		/*
+		 * Otherwise for EOI, we use the special MMIO that does
+		 * a clear of both P and Q and returns the old Q,
+		 * except for LSIs where we use the "EOI cycle" special
+		 * load.
+		 *
+		 * This allows us to then do a re-trigger if Q was set
+		 * rather than synthetizing an interrupt in software
+		 */
+		eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
+
+		/* Re-trigger if needed */
+		if ((eoi_val & 1) && __x_trig_page(xd))
+			__raw_writeq(0, __x_trig_page(xd));
+	}
+}
+
+enum {
+	scan_fetch,
+	scan_poll,
+	scan_eoi,
+};
+
+static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
+				       u8 pending, int scan_type)
+{
+	u32 hirq = 0;
+	u8 prio = 0xff;
+
+	/* Find highest pending priority */
+	while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
+		struct xive_q *q;
+		u32 idx, toggle;
+		__be32 *qpage;
+
+		/*
+		 * If pending is 0 this will return 0xff which is what
+		 * we want
+		 */
+		prio = ffs(pending) - 1;
+
+		/* Don't scan past the guest cppr */
+		if (prio >= xc->cppr || prio > 7) {
+			if (xc->mfrr < xc->cppr) {
+				prio = xc->mfrr;
+				hirq = XICS_IPI;
+			}
+			break;
+		}
+
+		/* Grab queue and pointers */
+		q = &xc->queues[prio];
+		idx = q->idx;
+		toggle = q->toggle;
+
+		/*
+		 * Snapshot the queue page. The test further down for EOI
+		 * must use the same "copy" that was used by __xive_read_eq
+		 * since qpage can be set concurrently and we don't want
+		 * to miss an EOI.
+		 */
+		qpage = READ_ONCE(q->qpage);
+
+skip_ipi:
+		/*
+		 * Try to fetch from the queue. Will return 0 for a
+		 * non-queueing priority (ie, qpage = 0).
+		 */
+		hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
+
+		/*
+		 * If this was a signal for an MFFR change done by
+		 * H_IPI we skip it. Additionally, if we were fetching
+		 * we EOI it now, thus re-enabling reception of a new
+		 * such signal.
+		 *
+		 * We also need to do that if prio is 0 and we had no
+		 * page for the queue. In this case, we have non-queued
+		 * IPI that needs to be EOId.
+		 *
+		 * This is safe because if we have another pending MFRR
+		 * change that wasn't observed above, the Q bit will have
+		 * been set and another occurrence of the IPI will trigger.
+		 */
+		if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
+			if (scan_type == scan_fetch) {
+				xive_vm_source_eoi(xc->vp_ipi,
+						       &xc->vp_ipi_data);
+				q->idx = idx;
+				q->toggle = toggle;
+			}
+			/* Loop back on same queue with updated idx/toggle */
+			WARN_ON(hirq && hirq != XICS_IPI);
+			if (hirq)
+				goto skip_ipi;
+		}
+
+		/* If it's the dummy interrupt, continue searching */
+		if (hirq == XICS_DUMMY)
+			goto skip_ipi;
+
+		/* Clear the pending bit if the queue is now empty */
+		if (!hirq) {
+			pending &= ~(1 << prio);
+
+			/*
+			 * Check if the queue count needs adjusting due to
+			 * interrupts being moved away.
+			 */
+			if (atomic_read(&q->pending_count)) {
+				int p = atomic_xchg(&q->pending_count, 0);
+
+				if (p) {
+					WARN_ON(p > atomic_read(&q->count));
+					atomic_sub(p, &q->count);
+				}
+			}
+		}
+
+		/*
+		 * If the most favoured prio we found pending is less
+		 * favored (or equal) than a pending IPI, we return
+		 * the IPI instead.
+		 */
+		if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
+			prio = xc->mfrr;
+			hirq = XICS_IPI;
+			break;
+		}
+
+		/* If fetching, update queue pointers */
+		if (scan_type == scan_fetch) {
+			q->idx = idx;
+			q->toggle = toggle;
+		}
+	}
+
+	/* If we are just taking a "peek", do nothing else */
+	if (scan_type == scan_poll)
+		return hirq;
+
+	/* Update the pending bits */
+	xc->pending = pending;
+
+	/*
+	 * If this is an EOI that's it, no CPPR adjustment done here,
+	 * all we needed was cleanup the stale pending bits and check
+	 * if there's anything left.
+	 */
+	if (scan_type == scan_eoi)
+		return hirq;
+
+	/*
+	 * If we found an interrupt, adjust what the guest CPPR should
+	 * be as if we had just fetched that interrupt from HW.
+	 *
+	 * Note: This can only make xc->cppr smaller as the previous
+	 * loop will only exit with hirq != 0 if prio is lower than
+	 * the current xc->cppr. Thus we don't need to re-check xc->mfrr
+	 * for pending IPIs.
+	 */
+	if (hirq)
+		xc->cppr = prio;
+	/*
+	 * If it was an IPI the HW CPPR might have been lowered too much
+	 * as the HW interrupt we use for IPIs is routed to priority 0.
+	 *
+	 * We re-sync it here.
+	 */
+	if (xc->cppr != xc->hw_cppr) {
+		xc->hw_cppr = xc->cppr;
+		__raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+	}
+
+	return hirq;
+}
+
+static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 old_cppr;
+	u32 hirq;
+
+	pr_devel("H_XIRR\n");
+
+	xc->stat_vm_h_xirr++;
+
+	/* First collect pending bits from HW */
+	xive_vm_ack_pending(xc);
+
+	pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
+		 xc->pending, xc->hw_cppr, xc->cppr);
+
+	/* Grab previous CPPR and reverse map it */
+	old_cppr = xive_prio_to_guest(xc->cppr);
+
+	/* Scan for actual interrupts */
+	hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
+
+	pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
+		 hirq, xc->hw_cppr, xc->cppr);
+
+	/* That should never hit */
+	if (hirq & 0xff000000)
+		pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
+
+	/*
+	 * XXX We could check if the interrupt is masked here and
+	 * filter it. If we chose to do so, we would need to do:
+	 *
+	 *    if (masked) {
+	 *        lock();
+	 *        if (masked) {
+	 *            old_Q = true;
+	 *            hirq = 0;
+	 *        }
+	 *        unlock();
+	 *    }
+	 */
+
+	/* Return interrupt and old CPPR in GPR4 */
+	vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
+
+	return H_SUCCESS;
+}
+
+static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	u8 pending = xc->pending;
+	u32 hirq;
+
+	pr_devel("H_IPOLL(server=%ld)\n", server);
+
+	xc->stat_vm_h_ipoll++;
+
+	/* Grab the target VCPU if not the current one */
+	if (xc->server_num != server) {
+		vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+		if (!vcpu)
+			return H_PARAMETER;
+		xc = vcpu->arch.xive_vcpu;
+
+		/* Scan all priorities */
+		pending = 0xff;
+	} else {
+		/* Grab pending interrupt if any */
+		__be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
+		u8 pipr = be64_to_cpu(qw1) & 0xff;
+
+		if (pipr < 8)
+			pending |= 1 << pipr;
+	}
+
+	hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
+
+	/* Return interrupt and old CPPR in GPR4 */
+	vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
+
+	return H_SUCCESS;
+}
+
+static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
+{
+	u8 pending, prio;
+
+	pending = xc->pending;
+	if (xc->mfrr != 0xff) {
+		if (xc->mfrr < 8)
+			pending |= 1 << xc->mfrr;
+		else
+			pending |= 0x80;
+	}
+	if (!pending)
+		return;
+	prio = ffs(pending) - 1;
+
+	__raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
+}
+
+static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
+					       struct kvmppc_xive_vcpu *xc)
+{
+	unsigned int prio;
+
+	/* For each priority that is now masked */
+	for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+		struct xive_q *q = &xc->queues[prio];
+		struct kvmppc_xive_irq_state *state;
+		struct kvmppc_xive_src_block *sb;
+		u32 idx, toggle, entry, irq, hw_num;
+		struct xive_irq_data *xd;
+		__be32 *qpage;
+		u16 src;
+
+		idx = q->idx;
+		toggle = q->toggle;
+		qpage = READ_ONCE(q->qpage);
+		if (!qpage)
+			continue;
+
+		/* For each interrupt in the queue */
+		for (;;) {
+			entry = be32_to_cpup(qpage + idx);
+
+			/* No more ? */
+			if ((entry >> 31) == toggle)
+				break;
+			irq = entry & 0x7fffffff;
+
+			/* Skip dummies and IPIs */
+			if (irq == XICS_DUMMY || irq == XICS_IPI)
+				goto next;
+			sb = kvmppc_xive_find_source(xive, irq, &src);
+			if (!sb)
+				goto next;
+			state = &sb->irq_state[src];
+
+			/* Has it been rerouted ? */
+			if (xc->server_num == state->act_server)
+				goto next;
+
+			/*
+			 * Allright, it *has* been re-routed, kill it from
+			 * the queue.
+			 */
+			qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
+
+			/* Find the HW interrupt */
+			kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+			/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
+			if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
+				xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
+
+			/* EOI the source */
+			xive_vm_source_eoi(hw_num, xd);
+
+next:
+			idx = (idx + 1) & q->msk;
+			if (idx == 0)
+				toggle ^= 1;
+		}
+	}
+}
+
+static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	u8 old_cppr;
+
+	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
+
+	xc->stat_vm_h_cppr++;
+
+	/* Map CPPR */
+	cppr = xive_prio_from_guest(cppr);
+
+	/* Remember old and update SW state */
+	old_cppr = xc->cppr;
+	xc->cppr = cppr;
+
+	/*
+	 * Order the above update of xc->cppr with the subsequent
+	 * read of xc->mfrr inside push_pending_to_hw()
+	 */
+	smp_mb();
+
+	if (cppr > old_cppr) {
+		/*
+		 * We are masking less, we need to look for pending things
+		 * to deliver and set VP pending bits accordingly to trigger
+		 * a new interrupt otherwise we might miss MFRR changes for
+		 * which we have optimized out sending an IPI signal.
+		 */
+		xive_vm_push_pending_to_hw(xc);
+	} else {
+		/*
+		 * We are masking more, we need to check the queue for any
+		 * interrupt that has been routed to another CPU, take
+		 * it out (replace it with the dummy) and retrigger it.
+		 *
+		 * This is necessary since those interrupts may otherwise
+		 * never be processed, at least not until this CPU restores
+		 * its CPPR.
+		 *
+		 * This is in theory racy vs. HW adding new interrupts to
+		 * the queue. In practice this works because the interesting
+		 * cases are when the guest has done a set_xive() to move the
+		 * interrupt away, which flushes the xive, followed by the
+		 * target CPU doing a H_CPPR. So any new interrupt coming into
+		 * the queue must still be routed to us and isn't a source
+		 * of concern.
+		 */
+		xive_vm_scan_for_rerouted_irqs(xive, xc);
+	}
+
+	/* Apply new CPPR */
+	xc->hw_cppr = cppr;
+	__raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+
+	return H_SUCCESS;
+}
+
+static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+	struct kvmppc_xive_src_block *sb;
+	struct kvmppc_xive_irq_state *state;
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+	struct xive_irq_data *xd;
+	u8 new_cppr = xirr >> 24;
+	u32 irq = xirr & 0x00ffffff, hw_num;
+	u16 src;
+	int rc = 0;
+
+	pr_devel("H_EOI(xirr=%08lx)\n", xirr);
+
+	xc->stat_vm_h_eoi++;
+
+	xc->cppr = xive_prio_from_guest(new_cppr);
+
+	/*
+	 * IPIs are synthetized from MFRR and thus don't need
+	 * any special EOI handling. The underlying interrupt
+	 * used to signal MFRR changes is EOId when fetched from
+	 * the queue.
+	 */
+	if (irq == XICS_IPI || irq == 0) {
+		/*
+		 * This barrier orders the setting of xc->cppr vs.
+		 * subsquent test of xc->mfrr done inside
+		 * scan_interrupts and push_pending_to_hw
+		 */
+		smp_mb();
+		goto bail;
+	}
+
+	/* Find interrupt source */
+	sb = kvmppc_xive_find_source(xive, irq, &src);
+	if (!sb) {
+		pr_devel(" source not found !\n");
+		rc = H_PARAMETER;
+		/* Same as above */
+		smp_mb();
+		goto bail;
+	}
+	state = &sb->irq_state[src];
+	kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+	state->in_eoi = true;
+
+	/*
+	 * This barrier orders both setting of in_eoi above vs,
+	 * subsequent test of guest_priority, and the setting
+	 * of xc->cppr vs. subsquent test of xc->mfrr done inside
+	 * scan_interrupts and push_pending_to_hw
+	 */
+	smp_mb();
+
+again:
+	if (state->guest_priority == MASKED) {
+		arch_spin_lock(&sb->lock);
+		if (state->guest_priority != MASKED) {
+			arch_spin_unlock(&sb->lock);
+			goto again;
+		}
+		pr_devel(" EOI on saved P...\n");
+
+		/* Clear old_p, that will cause unmask to perform an EOI */
+		state->old_p = false;
+
+		arch_spin_unlock(&sb->lock);
+	} else {
+		pr_devel(" EOI on source...\n");
+
+		/* Perform EOI on the source */
+		xive_vm_source_eoi(hw_num, xd);
+
+		/* If it's an emulated LSI, check level and resend */
+		if (state->lsi && state->asserted)
+			__raw_writeq(0, __x_trig_page(xd));
+
+	}
+
+	/*
+	 * This barrier orders the above guest_priority check
+	 * and spin_lock/unlock with clearing in_eoi below.
+	 *
+	 * It also has to be a full mb() as it must ensure
+	 * the MMIOs done in source_eoi() are completed before
+	 * state->in_eoi is visible.
+	 */
+	mb();
+	state->in_eoi = false;
+bail:
+
+	/* Re-evaluate pending IRQs and update HW */
+	xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
+	xive_vm_push_pending_to_hw(xc);
+	pr_devel(" after scan pending=%02x\n", xc->pending);
+
+	/* Apply new CPPR */
+	xc->hw_cppr = xc->cppr;
+	__raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+
+	return rc;
+}
+
+static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+			       unsigned long mfrr)
+{
+	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+	pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
+
+	xc->stat_vm_h_ipi++;
+
+	/* Find target */
+	vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+	if (!vcpu)
+		return H_PARAMETER;
+	xc = vcpu->arch.xive_vcpu;
+
+	/* Locklessly write over MFRR */
+	xc->mfrr = mfrr;
+
+	/*
+	 * The load of xc->cppr below and the subsequent MMIO store
+	 * to the IPI must happen after the above mfrr update is
+	 * globally visible so that:
+	 *
+	 * - Synchronize with another CPU doing an H_EOI or a H_CPPR
+	 *   updating xc->cppr then reading xc->mfrr.
+	 *
+	 * - The target of the IPI sees the xc->mfrr update
+	 */
+	mb();
+
+	/* Shoot the IPI if most favored than target cppr */
+	if (mfrr < xc->cppr)
+		__raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
+
+	return H_SUCCESS;
+}
 
 /*
  * We leave a gap of a couple of interrupts in the queue to
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
deleted file mode 100644
index b0015e05d99a..000000000000
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ /dev/null
@@ -1,636 +0,0 @@ 
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
- */
-
-/* File to be included by other .c files */
-
-#define XGLUE(a,b) a##b
-#define GLUE(a,b) XGLUE(a,b)
-
-/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
-#define XICS_DUMMY	1
-
-static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
-{
-	u8 cppr;
-	u16 ack;
-
-	/*
-	 * Ensure any previous store to CPPR is ordered vs.
-	 * the subsequent loads from PIPR or ACK.
-	 */
-	eieio();
-
-	/* Perform the acknowledge OS to register cycle. */
-	ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
-
-	/* Synchronize subsequent queue accesses */
-	mb();
-
-	/* XXX Check grouping level */
-
-	/* Anything ? */
-	if (!((ack >> 8) & TM_QW1_NSR_EO))
-		return;
-
-	/* Grab CPPR of the most favored pending interrupt */
-	cppr = ack & 0xff;
-	if (cppr < 8)
-		xc->pending |= 1 << cppr;
-
-#ifdef XIVE_RUNTIME_CHECKS
-	/* Check consistency */
-	if (cppr >= xc->hw_cppr)
-		pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
-			smp_processor_id(), cppr, xc->hw_cppr);
-#endif
-
-	/*
-	 * Update our image of the HW CPPR. We don't yet modify
-	 * xc->cppr, this will be done as we scan for interrupts
-	 * in the queues.
-	 */
-	xc->hw_cppr = cppr;
-}
-
-static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
-{
-	u64 val;
-
-	if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-		offset |= XIVE_ESB_LD_ST_MO;
-
-	val =__x_readq(__x_eoi_page(xd) + offset);
-#ifdef __LITTLE_ENDIAN__
-	val >>= 64-8;
-#endif
-	return (u8)val;
-}
-
-
-static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
-{
-	/* If the XIVE supports the new "store EOI facility, use it */
-	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-		__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
-	else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
-		/*
-		 * For LSIs the HW EOI cycle is used rather than PQ bits,
-		 * as they are automatically re-triggred in HW when still
-		 * pending.
-		 */
-		__x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
-	} else {
-		uint64_t eoi_val;
-
-		/*
-		 * Otherwise for EOI, we use the special MMIO that does
-		 * a clear of both P and Q and returns the old Q,
-		 * except for LSIs where we use the "EOI cycle" special
-		 * load.
-		 *
-		 * This allows us to then do a re-trigger if Q was set
-		 * rather than synthetizing an interrupt in software
-		 */
-		eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
-
-		/* Re-trigger if needed */
-		if ((eoi_val & 1) && __x_trig_page(xd))
-			__x_writeq(0, __x_trig_page(xd));
-	}
-}
-
-enum {
-	scan_fetch,
-	scan_poll,
-	scan_eoi,
-};
-
-static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
-				       u8 pending, int scan_type)
-{
-	u32 hirq = 0;
-	u8 prio = 0xff;
-
-	/* Find highest pending priority */
-	while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
-		struct xive_q *q;
-		u32 idx, toggle;
-		__be32 *qpage;
-
-		/*
-		 * If pending is 0 this will return 0xff which is what
-		 * we want
-		 */
-		prio = ffs(pending) - 1;
-
-		/* Don't scan past the guest cppr */
-		if (prio >= xc->cppr || prio > 7) {
-			if (xc->mfrr < xc->cppr) {
-				prio = xc->mfrr;
-				hirq = XICS_IPI;
-			}
-			break;
-		}
-
-		/* Grab queue and pointers */
-		q = &xc->queues[prio];
-		idx = q->idx;
-		toggle = q->toggle;
-
-		/*
-		 * Snapshot the queue page. The test further down for EOI
-		 * must use the same "copy" that was used by __xive_read_eq
-		 * since qpage can be set concurrently and we don't want
-		 * to miss an EOI.
-		 */
-		qpage = READ_ONCE(q->qpage);
-
-skip_ipi:
-		/*
-		 * Try to fetch from the queue. Will return 0 for a
-		 * non-queueing priority (ie, qpage = 0).
-		 */
-		hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
-
-		/*
-		 * If this was a signal for an MFFR change done by
-		 * H_IPI we skip it. Additionally, if we were fetching
-		 * we EOI it now, thus re-enabling reception of a new
-		 * such signal.
-		 *
-		 * We also need to do that if prio is 0 and we had no
-		 * page for the queue. In this case, we have non-queued
-		 * IPI that needs to be EOId.
-		 *
-		 * This is safe because if we have another pending MFRR
-		 * change that wasn't observed above, the Q bit will have
-		 * been set and another occurrence of the IPI will trigger.
-		 */
-		if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
-			if (scan_type == scan_fetch) {
-				GLUE(X_PFX,source_eoi)(xc->vp_ipi,
-						       &xc->vp_ipi_data);
-				q->idx = idx;
-				q->toggle = toggle;
-			}
-			/* Loop back on same queue with updated idx/toggle */
-#ifdef XIVE_RUNTIME_CHECKS
-			WARN_ON(hirq && hirq != XICS_IPI);
-#endif
-			if (hirq)
-				goto skip_ipi;
-		}
-
-		/* If it's the dummy interrupt, continue searching */
-		if (hirq == XICS_DUMMY)
-			goto skip_ipi;
-
-		/* Clear the pending bit if the queue is now empty */
-		if (!hirq) {
-			pending &= ~(1 << prio);
-
-			/*
-			 * Check if the queue count needs adjusting due to
-			 * interrupts being moved away.
-			 */
-			if (atomic_read(&q->pending_count)) {
-				int p = atomic_xchg(&q->pending_count, 0);
-				if (p) {
-#ifdef XIVE_RUNTIME_CHECKS
-					WARN_ON(p > atomic_read(&q->count));
-#endif
-					atomic_sub(p, &q->count);
-				}
-			}
-		}
-
-		/*
-		 * If the most favoured prio we found pending is less
-		 * favored (or equal) than a pending IPI, we return
-		 * the IPI instead.
-		 */
-		if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
-			prio = xc->mfrr;
-			hirq = XICS_IPI;
-			break;
-		}
-
-		/* If fetching, update queue pointers */
-		if (scan_type == scan_fetch) {
-			q->idx = idx;
-			q->toggle = toggle;
-		}
-	}
-
-	/* If we are just taking a "peek", do nothing else */
-	if (scan_type == scan_poll)
-		return hirq;
-
-	/* Update the pending bits */
-	xc->pending = pending;
-
-	/*
-	 * If this is an EOI that's it, no CPPR adjustment done here,
-	 * all we needed was cleanup the stale pending bits and check
-	 * if there's anything left.
-	 */
-	if (scan_type == scan_eoi)
-		return hirq;
-
-	/*
-	 * If we found an interrupt, adjust what the guest CPPR should
-	 * be as if we had just fetched that interrupt from HW.
-	 *
-	 * Note: This can only make xc->cppr smaller as the previous
-	 * loop will only exit with hirq != 0 if prio is lower than
-	 * the current xc->cppr. Thus we don't need to re-check xc->mfrr
-	 * for pending IPIs.
-	 */
-	if (hirq)
-		xc->cppr = prio;
-	/*
-	 * If it was an IPI the HW CPPR might have been lowered too much
-	 * as the HW interrupt we use for IPIs is routed to priority 0.
-	 *
-	 * We re-sync it here.
-	 */
-	if (xc->cppr != xc->hw_cppr) {
-		xc->hw_cppr = xc->cppr;
-		__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
-	}
-
-	return hirq;
-}
-
-X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
-	u8 old_cppr;
-	u32 hirq;
-
-	pr_devel("H_XIRR\n");
-
-	xc->GLUE(X_STAT_PFX,h_xirr)++;
-
-	/* First collect pending bits from HW */
-	GLUE(X_PFX,ack_pending)(xc);
-
-	pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
-		 xc->pending, xc->hw_cppr, xc->cppr);
-
-	/* Grab previous CPPR and reverse map it */
-	old_cppr = xive_prio_to_guest(xc->cppr);
-
-	/* Scan for actual interrupts */
-	hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
-
-	pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
-		 hirq, xc->hw_cppr, xc->cppr);
-
-#ifdef XIVE_RUNTIME_CHECKS
-	/* That should never hit */
-	if (hirq & 0xff000000)
-		pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
-#endif
-
-	/*
-	 * XXX We could check if the interrupt is masked here and
-	 * filter it. If we chose to do so, we would need to do:
-	 *
-	 *    if (masked) {
-	 *        lock();
-	 *        if (masked) {
-	 *            old_Q = true;
-	 *            hirq = 0;
-	 *        }
-	 *        unlock();
-	 *    }
-	 */
-
-	/* Return interrupt and old CPPR in GPR4 */
-	vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
-
-	return H_SUCCESS;
-}
-
-X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
-{
-	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
-	u8 pending = xc->pending;
-	u32 hirq;
-
-	pr_devel("H_IPOLL(server=%ld)\n", server);
-
-	xc->GLUE(X_STAT_PFX,h_ipoll)++;
-
-	/* Grab the target VCPU if not the current one */
-	if (xc->server_num != server) {
-		vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
-		if (!vcpu)
-			return H_PARAMETER;
-		xc = vcpu->arch.xive_vcpu;
-
-		/* Scan all priorities */
-		pending = 0xff;
-	} else {
-		/* Grab pending interrupt if any */
-		__be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
-		u8 pipr = be64_to_cpu(qw1) & 0xff;
-		if (pipr < 8)
-			pending |= 1 << pipr;
-	}
-
-	hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
-
-	/* Return interrupt and old CPPR in GPR4 */
-	vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
-
-	return H_SUCCESS;
-}
-
-static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
-{
-	u8 pending, prio;
-
-	pending = xc->pending;
-	if (xc->mfrr != 0xff) {
-		if (xc->mfrr < 8)
-			pending |= 1 << xc->mfrr;
-		else
-			pending |= 0x80;
-	}
-	if (!pending)
-		return;
-	prio = ffs(pending) - 1;
-
-	__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
-}
-
-static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
-					       struct kvmppc_xive_vcpu *xc)
-{
-	unsigned int prio;
-
-	/* For each priority that is now masked */
-	for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
-		struct xive_q *q = &xc->queues[prio];
-		struct kvmppc_xive_irq_state *state;
-		struct kvmppc_xive_src_block *sb;
-		u32 idx, toggle, entry, irq, hw_num;
-		struct xive_irq_data *xd;
-		__be32 *qpage;
-		u16 src;
-
-		idx = q->idx;
-		toggle = q->toggle;
-		qpage = READ_ONCE(q->qpage);
-		if (!qpage)
-			continue;
-
-		/* For each interrupt in the queue */
-		for (;;) {
-			entry = be32_to_cpup(qpage + idx);
-
-			/* No more ? */
-			if ((entry >> 31) == toggle)
-				break;
-			irq = entry & 0x7fffffff;
-
-			/* Skip dummies and IPIs */
-			if (irq == XICS_DUMMY || irq == XICS_IPI)
-				goto next;
-			sb = kvmppc_xive_find_source(xive, irq, &src);
-			if (!sb)
-				goto next;
-			state = &sb->irq_state[src];
-
-			/* Has it been rerouted ? */
-			if (xc->server_num == state->act_server)
-				goto next;
-
-			/*
-			 * Allright, it *has* been re-routed, kill it from
-			 * the queue.
-			 */
-			qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
-
-			/* Find the HW interrupt */
-			kvmppc_xive_select_irq(state, &hw_num, &xd);
-
-			/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
-			if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
-				GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
-
-			/* EOI the source */
-			GLUE(X_PFX,source_eoi)(hw_num, xd);
-
-		next:
-			idx = (idx + 1) & q->msk;
-			if (idx == 0)
-				toggle ^= 1;
-		}
-	}
-}
-
-X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
-{
-	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
-	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
-	u8 old_cppr;
-
-	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
-
-	xc->GLUE(X_STAT_PFX,h_cppr)++;
-
-	/* Map CPPR */
-	cppr = xive_prio_from_guest(cppr);
-
-	/* Remember old and update SW state */
-	old_cppr = xc->cppr;
-	xc->cppr = cppr;
-
-	/*
-	 * Order the above update of xc->cppr with the subsequent
-	 * read of xc->mfrr inside push_pending_to_hw()
-	 */
-	smp_mb();
-
-	if (cppr > old_cppr) {
-		/*
-		 * We are masking less, we need to look for pending things
-		 * to deliver and set VP pending bits accordingly to trigger
-		 * a new interrupt otherwise we might miss MFRR changes for
-		 * which we have optimized out sending an IPI signal.
-		 */
-		GLUE(X_PFX,push_pending_to_hw)(xc);
-	} else {
-		/*
-		 * We are masking more, we need to check the queue for any
-		 * interrupt that has been routed to another CPU, take
-		 * it out (replace it with the dummy) and retrigger it.
-		 *
-		 * This is necessary since those interrupts may otherwise
-		 * never be processed, at least not until this CPU restores
-		 * its CPPR.
-		 *
-		 * This is in theory racy vs. HW adding new interrupts to
-		 * the queue. In practice this works because the interesting
-		 * cases are when the guest has done a set_xive() to move the
-		 * interrupt away, which flushes the xive, followed by the
-		 * target CPU doing a H_CPPR. So any new interrupt coming into
-		 * the queue must still be routed to us and isn't a source
-		 * of concern.
-		 */
-		GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
-	}
-
-	/* Apply new CPPR */
-	xc->hw_cppr = cppr;
-	__x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
-
-	return H_SUCCESS;
-}
-
-X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
-{
-	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
-	struct kvmppc_xive_src_block *sb;
-	struct kvmppc_xive_irq_state *state;
-	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
-	struct xive_irq_data *xd;
-	u8 new_cppr = xirr >> 24;
-	u32 irq = xirr & 0x00ffffff, hw_num;
-	u16 src;
-	int rc = 0;
-
-	pr_devel("H_EOI(xirr=%08lx)\n", xirr);
-
-	xc->GLUE(X_STAT_PFX,h_eoi)++;
-
-	xc->cppr = xive_prio_from_guest(new_cppr);
-
-	/*
-	 * IPIs are synthetized from MFRR and thus don't need
-	 * any special EOI handling. The underlying interrupt
-	 * used to signal MFRR changes is EOId when fetched from
-	 * the queue.
-	 */
-	if (irq == XICS_IPI || irq == 0) {
-		/*
-		 * This barrier orders the setting of xc->cppr vs.
-		 * subsquent test of xc->mfrr done inside
-		 * scan_interrupts and push_pending_to_hw
-		 */
-		smp_mb();
-		goto bail;
-	}
-
-	/* Find interrupt source */
-	sb = kvmppc_xive_find_source(xive, irq, &src);
-	if (!sb) {
-		pr_devel(" source not found !\n");
-		rc = H_PARAMETER;
-		/* Same as above */
-		smp_mb();
-		goto bail;
-	}
-	state = &sb->irq_state[src];
-	kvmppc_xive_select_irq(state, &hw_num, &xd);
-
-	state->in_eoi = true;
-
-	/*
-	 * This barrier orders both setting of in_eoi above vs,
-	 * subsequent test of guest_priority, and the setting
-	 * of xc->cppr vs. subsquent test of xc->mfrr done inside
-	 * scan_interrupts and push_pending_to_hw
-	 */
-	smp_mb();
-
-again:
-	if (state->guest_priority == MASKED) {
-		arch_spin_lock(&sb->lock);
-		if (state->guest_priority != MASKED) {
-			arch_spin_unlock(&sb->lock);
-			goto again;
-		}
-		pr_devel(" EOI on saved P...\n");
-
-		/* Clear old_p, that will cause unmask to perform an EOI */
-		state->old_p = false;
-
-		arch_spin_unlock(&sb->lock);
-	} else {
-		pr_devel(" EOI on source...\n");
-
-		/* Perform EOI on the source */
-		GLUE(X_PFX,source_eoi)(hw_num, xd);
-
-		/* If it's an emulated LSI, check level and resend */
-		if (state->lsi && state->asserted)
-			__x_writeq(0, __x_trig_page(xd));
-
-	}
-
-	/*
-	 * This barrier orders the above guest_priority check
-	 * and spin_lock/unlock with clearing in_eoi below.
-	 *
-	 * It also has to be a full mb() as it must ensure
-	 * the MMIOs done in source_eoi() are completed before
-	 * state->in_eoi is visible.
-	 */
-	mb();
-	state->in_eoi = false;
-bail:
-
-	/* Re-evaluate pending IRQs and update HW */
-	GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
-	GLUE(X_PFX,push_pending_to_hw)(xc);
-	pr_devel(" after scan pending=%02x\n", xc->pending);
-
-	/* Apply new CPPR */
-	xc->hw_cppr = xc->cppr;
-	__x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
-
-	return rc;
-}
-
-X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
-			       unsigned long mfrr)
-{
-	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
-
-	pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
-
-	xc->GLUE(X_STAT_PFX,h_ipi)++;
-
-	/* Find target */
-	vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
-	if (!vcpu)
-		return H_PARAMETER;
-	xc = vcpu->arch.xive_vcpu;
-
-	/* Locklessly write over MFRR */
-	xc->mfrr = mfrr;
-
-	/*
-	 * The load of xc->cppr below and the subsequent MMIO store
-	 * to the IPI must happen after the above mfrr update is
-	 * globally visible so that:
-	 *
-	 * - Synchronize with another CPU doing an H_EOI or a H_CPPR
-	 *   updating xc->cppr then reading xc->mfrr.
-	 *
-	 * - The target of the IPI sees the xc->mfrr update
-	 */
-	mb();
-
-	/* Shoot the IPI if most favored than target cppr */
-	if (mfrr < xc->cppr)
-		__x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
-
-	return H_SUCCESS;
-}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 44d74bfe05df..5003563ca38f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1803,11 +1803,11 @@  hcall_real_table:
 	.long	0		/* 0x5c */
 	.long	0		/* 0x60 */
 #ifdef CONFIG_KVM_XICS
-	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
+	.long	DOTSYM(xics_rm_h_eoi) - hcall_real_table
+	.long	DOTSYM(xics_rm_h_cppr) - hcall_real_table
+	.long	DOTSYM(xics_rm_h_ipi) - hcall_real_table
+	.long	0		/* 0x70 - H_IPOLL */
+	.long	DOTSYM(xics_rm_h_xirr) - hcall_real_table
 #else
 	.long	0		/* 0x64 - H_EOI */
 	.long	0		/* 0x68 - H_CPPR */
@@ -1977,7 +1977,7 @@  hcall_real_table:
 	.long	0		/* 0x2f4 */
 	.long	0		/* 0x2f8 */
 #ifdef CONFIG_KVM_XICS
-	.long	DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
+	.long	DOTSYM(xics_rm_h_xirr_x) - hcall_real_table
 #else
 	.long	0		/* 0x2fc - H_XIRR_X*/
 #endif