Message ID | 1594888333-9370-1-git-send-email-linuxram@us.ibm.com |
---|---|
State | RFC |
Headers | show |
Series | [RFC] powerpc/pseries/svm: capture instruction faulting on MMIO access, in sprg0 register | expand |
Le 16/07/2020 à 10:32, Ram Pai a écrit : > An instruction accessing a mmio address, generates a HDSI fault. This fault is > appropriately handled by the Hypervisor. However in the case of secureVMs, the > fault is delivered to the ultravisor. > > Unfortunately the Ultravisor has no correct-way to fetch the faulting > instruction. The PEF architecture does not allow Ultravisor to enable MMU > translation. Walking the two level page table to read the instruction can race > with other vcpus modifying the SVM's process scoped page table. > > This problem can be correctly solved with some help from the kernel. > > Capture the faulting instruction in SPRG0 register, before executing the > faulting instruction. This enables the ultravisor to easily procure the > faulting instruction and emulate it. > > Signed-off-by: Ram Pai <linuxram@us.ibm.com> > --- > arch/powerpc/include/asm/io.h | 85 ++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 75 insertions(+), 10 deletions(-) > > diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h > index 635969b..7ef663d 100644 > --- a/arch/powerpc/include/asm/io.h > +++ b/arch/powerpc/include/asm/io.h > @@ -35,6 +35,7 @@ > #include <asm/mmu.h> > #include <asm/ppc_asm.h> > #include <asm/pgtable.h> > +#include <asm/svm.h> > > #define SIO_CONFIG_RA 0x398 > #define SIO_CONFIG_RD 0x399 > @@ -105,34 +106,98 @@ > static inline u##size name(const volatile u##size __iomem *addr) \ > { \ > u##size ret; \ > - __asm__ __volatile__("sync;"#insn" %0,%y1;twi 0,%0,0;isync" \ > - : "=r" (ret) : "Z" (*addr) : "memory"); \ > + if (is_secure_guest()) { \ > + __asm__ __volatile__("mfsprg0 %3;" \ > + "lnia %2;" \ > + "ld %2,12(%2);" \ > + "mtsprg0 %2;" \ > + "sync;" \ > + #insn" %0,%y1;" \ > + "twi 0,%0,0;" \ > + "isync;" \ > + "mtsprg0 %3" \ > + : "=r" (ret) \ > + : "Z" (*addr), "r" (0), "r" (0) \ I'm wondering if SPRG0 is restored to its original value. You're using the same register (r0) for parameters 2 and 3, so when doing lnia %2, you're overwriting the SPRG0 value you saved in r0 just earlier. It may be clearer to use explicit registers for %2 and %3 and to mark them as modified for the compiler. This applies to the other macros. Cheers, Laurent. > + : "memory"); \ > + } else { \ > + __asm__ __volatile__("sync;" \ > + #insn" %0,%y1;" \ > + "twi 0,%0,0;" \ > + "isync" \ > + : "=r" (ret) : "Z" (*addr) : "memory"); \ > + } \ > return ret; \ > } > > #define DEF_MMIO_OUT_X(name, size, insn) \ > static inline void name(volatile u##size __iomem *addr, u##size val) \ > { \ > - __asm__ __volatile__("sync;"#insn" %1,%y0" \ > - : "=Z" (*addr) : "r" (val) : "memory"); \ > - mmiowb_set_pending(); \ > + if (is_secure_guest()) { \ > + __asm__ __volatile__("mfsprg0 %3;" \ > + "lnia %2;" \ > + "ld %2,12(%2);" \ > + "mtsprg0 %2;" \ > + "sync;" \ > + #insn" %1,%y0;" \ > + "mtsprg0 %3" \ > + : "=Z" (*addr) \ > + : "r" (val), "r" (0), "r" (0) \ > + : "memory"); \ > + } else { \ > + __asm__ __volatile__("sync;" \ > + #insn" %1,%y0" \ > + : "=Z" (*addr) : "r" (val) : "memory"); \ > + mmiowb_set_pending(); \ > + } \ > } > > #define DEF_MMIO_IN_D(name, size, insn) \ > static inline u##size name(const volatile u##size __iomem *addr) \ > { \ > u##size ret; \ > - __asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\ > - : "=r" (ret) : "m" (*addr) : "memory"); \ > + if (is_secure_guest()) { \ > + __asm__ __volatile__("mfsprg0 %3;" \ > + "lnia %2;" \ > + "ld %2,12(%2);" \ > + "mtsprg0 %2;" \ > + "sync;" \ > + #insn"%U1%X1 %0,%1;" \ > + "twi 0,%0,0;" \ > + "isync;" \ > + "mtsprg0 %3" \ > + : "=r" (ret) \ > + : "m" (*addr), "r" (0), "r" (0) \ > + : "memory"); \ > + } else { \ > + __asm__ __volatile__("sync;" \ > + #insn"%U1%X1 %0,%1;" \ > + "twi 0,%0,0;" \ > + "isync" \ > + : "=r" (ret) : "m" (*addr) : "memory"); \ > + } \ > return ret; \ > } > > #define DEF_MMIO_OUT_D(name, size, insn) \ > static inline void name(volatile u##size __iomem *addr, u##size val) \ > { \ > - __asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \ > - : "=m" (*addr) : "r" (val) : "memory"); \ > - mmiowb_set_pending(); \ > + if (is_secure_guest()) { \ > + __asm__ __volatile__("mfsprg0 %3;" \ > + "lnia %2;" \ > + "ld %2,12(%2);" \ > + "mtsprg0 %2;" \ > + "sync;" \ > + #insn"%U0%X0 %1,%0;" \ > + "mtsprg0 %3" \ > + : "=m" (*addr) \ > + : "r" (val), "r" (0), "r" (0) \ > + : "memory"); \ > + } else { \ > + __asm__ __volatile__("sync;" \ > + #insn"%U0%X0 %1,%0" \ > + : "=m" (*addr) : "r" (val) : "memory"); \ > + mmiowb_set_pending(); \ > + } \ > } > > DEF_MMIO_IN_D(in_8, 8, lbz); >
On Mon, Jul 20, 2020 at 11:39:56AM +0200, Laurent Dufour wrote: > Le 16/07/2020 à 10:32, Ram Pai a écrit : > >+ if (is_secure_guest()) { \ > >+ __asm__ __volatile__("mfsprg0 %3;" \ > >+ "lnia %2;" \ > >+ "ld %2,12(%2);" \ > >+ "mtsprg0 %2;" \ > >+ "sync;" \ > >+ #insn" %0,%y1;" \ > >+ "twi 0,%0,0;" \ > >+ "isync;" \ > >+ "mtsprg0 %3" \ > >+ : "=r" (ret) \ > >+ : "Z" (*addr), "r" (0), "r" (0) \ > > I'm wondering if SPRG0 is restored to its original value. > You're using the same register (r0) for parameters 2 and 3, so when doing > lnia %2, you're overwriting the SPRG0 value you saved in r0 just earlier. It is putting the value 0 in the registers the compiler chooses for operands 2 and 3. But operand 3 is written, while the asm says it is an input. It needs an earlyclobber as well. > It may be clearer to use explicit registers for %2 and %3 and to mark them > as modified for the compiler. That is not a good idea, imnsho. Segher
On Mon, Jul 20, 2020 at 03:10:41PM -0500, Segher Boessenkool wrote: > On Mon, Jul 20, 2020 at 11:39:56AM +0200, Laurent Dufour wrote: > > Le 16/07/2020 à 10:32, Ram Pai a écrit : > > >+ if (is_secure_guest()) { \ > > >+ __asm__ __volatile__("mfsprg0 %3;" \ > > >+ "lnia %2;" \ > > >+ "ld %2,12(%2);" \ > > >+ "mtsprg0 %2;" \ > > >+ "sync;" \ > > >+ #insn" %0,%y1;" \ > > >+ "twi 0,%0,0;" \ > > >+ "isync;" \ > > >+ "mtsprg0 %3" \ > > >+ : "=r" (ret) \ > > >+ : "Z" (*addr), "r" (0), "r" (0) \ > > > > I'm wondering if SPRG0 is restored to its original value. > > You're using the same register (r0) for parameters 2 and 3, so when doing > > lnia %2, you're overwriting the SPRG0 value you saved in r0 just earlier. > > It is putting the value 0 in the registers the compiler chooses for > operands 2 and 3. But operand 3 is written, while the asm says it is an > input. It needs an earlyclobber as well. > > > It may be clearer to use explicit registers for %2 and %3 and to mark them > > as modified for the compiler. > > That is not a good idea, imnsho. (The explicit register number part, I mean; operand 2 should be an output as well, yes.) Segher
Le 20/07/2020 à 22:24, Segher Boessenkool a écrit : > On Mon, Jul 20, 2020 at 03:10:41PM -0500, Segher Boessenkool wrote: >> On Mon, Jul 20, 2020 at 11:39:56AM +0200, Laurent Dufour wrote: >>> Le 16/07/2020 à 10:32, Ram Pai a écrit : >>>> + if (is_secure_guest()) { \ >>>> + __asm__ __volatile__("mfsprg0 %3;" \ >>>> + "lnia %2;" \ >>>> + "ld %2,12(%2);" \ >>>> + "mtsprg0 %2;" \ >>>> + "sync;" \ >>>> + #insn" %0,%y1;" \ >>>> + "twi 0,%0,0;" \ >>>> + "isync;" \ >>>> + "mtsprg0 %3" \ >>>> + : "=r" (ret) \ >>>> + : "Z" (*addr), "r" (0), "r" (0) \ >>> >>> I'm wondering if SPRG0 is restored to its original value. >>> You're using the same register (r0) for parameters 2 and 3, so when doing >>> lnia %2, you're overwriting the SPRG0 value you saved in r0 just earlier. >> >> It is putting the value 0 in the registers the compiler chooses for >> operands 2 and 3. But operand 3 is written, while the asm says it is an >> input. It needs an earlyclobber as well. Oh nice, I was not aware that compiler may choose registers this way. Good to know, thanks for the explanation. >>> It may be clearer to use explicit registers for %2 and %3 and to mark them >>> as modified for the compiler. >> >> That is not a good idea, imnsho. > > (The explicit register number part, I mean; operand 2 should be an > output as well, yes.) Sure if the compiler can choose the registers that's far better. Cheers, Laurent.
Excerpts from Ram Pai's message of July 16, 2020 6:32 pm: > An instruction accessing a mmio address, generates a HDSI fault. This fault is > appropriately handled by the Hypervisor. However in the case of secureVMs, the > fault is delivered to the ultravisor. Why not a ucall if you're paraultravizing it anyway? > > Unfortunately the Ultravisor has no correct-way to fetch the faulting > instruction. The PEF architecture does not allow Ultravisor to enable MMU > translation. Walking the two level page table to read the instruction can race > with other vcpus modifying the SVM's process scoped page table. > > This problem can be correctly solved with some help from the kernel. > > Capture the faulting instruction in SPRG0 register, before executing the > faulting instruction. This enables the ultravisor to easily procure the > faulting instruction and emulate it. > > Signed-off-by: Ram Pai <linuxram@us.ibm.com> > --- > arch/powerpc/include/asm/io.h | 85 ++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 75 insertions(+), 10 deletions(-) > > diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h > index 635969b..7ef663d 100644 > --- a/arch/powerpc/include/asm/io.h > +++ b/arch/powerpc/include/asm/io.h > @@ -35,6 +35,7 @@ > #include <asm/mmu.h> > #include <asm/ppc_asm.h> > #include <asm/pgtable.h> > +#include <asm/svm.h> > > #define SIO_CONFIG_RA 0x398 > #define SIO_CONFIG_RD 0x399 > @@ -105,34 +106,98 @@ > static inline u##size name(const volatile u##size __iomem *addr) \ > { \ > u##size ret; \ > - __asm__ __volatile__("sync;"#insn" %0,%y1;twi 0,%0,0;isync" \ > - : "=r" (ret) : "Z" (*addr) : "memory"); \ > + if (is_secure_guest()) { \ > + __asm__ __volatile__("mfsprg0 %3;" \ > + "lnia %2;" \ > + "ld %2,12(%2);" \ > + "mtsprg0 %2;" \ > + "sync;" \ > + #insn" %0,%y1;" \ > + "twi 0,%0,0;" \ > + "isync;" \ > + "mtsprg0 %3" \ We prefer to use mtspr in new code, and the nia offset should be calculated with a label I think "(1f - .)(%2)" should work. SPRG usage is documented in arch/powerpc/include/asm/reg.h if this goes past RFC stage. Looks like SPRG0 probably could be used for this. Thanks, Nick
Ram Pai <linuxram@us.ibm.com> writes: > An instruction accessing a mmio address, generates a HDSI fault. This fault is > appropriately handled by the Hypervisor. However in the case of secureVMs, the > fault is delivered to the ultravisor. > > Unfortunately the Ultravisor has no correct-way to fetch the faulting > instruction. The PEF architecture does not allow Ultravisor to enable MMU > translation. Walking the two level page table to read the instruction can race > with other vcpus modifying the SVM's process scoped page table. You're trying to read the guest's kernel text IIUC, that mapping should be stable. Possibly permissions on it could change over time, but the virtual -> real mapping should not. > This problem can be correctly solved with some help from the kernel. > > Capture the faulting instruction in SPRG0 register, before executing the > faulting instruction. This enables the ultravisor to easily procure the > faulting instruction and emulate it. This is not something I'm going to merge. Sorry. cheers
On Wed, 2020-07-22 at 12:06 +1000, Michael Ellerman wrote: > Ram Pai <linuxram@us.ibm.com> writes: > > An instruction accessing a mmio address, generates a HDSI fault. This fault is > > appropriately handled by the Hypervisor. However in the case of secureVMs, the > > fault is delivered to the ultravisor. > > > > Unfortunately the Ultravisor has no correct-way to fetch the faulting > > instruction. The PEF architecture does not allow Ultravisor to enable MMU > > translation. Walking the two level page table to read the instruction can race > > with other vcpus modifying the SVM's process scoped page table. > > You're trying to read the guest's kernel text IIUC, that mapping should > be stable. Possibly permissions on it could change over time, but the > virtual -> real mapping should not. This will of course no longer work if userspace tries to access MMIO but as long as you are ok limiting MMIO usage to the kernel, that's one way. iirc MMIO emulation in KVM on powerpc already has that race because of HW TLB inval broadcast and it hasn't hurt anybody ... so far. > > This problem can be correctly solved with some help from the kernel. > > > > Capture the faulting instruction in SPRG0 register, before executing the > > faulting instruction. This enables the ultravisor to easily procure the > > faulting instruction and emulate it. > > This is not something I'm going to merge. Sorry. Another approach is to have the guest explicitly switch to using UV calls for MMIO. Cheers, Ben.
On Thu, Jul 16, 2020 at 01:32:13AM -0700, Ram Pai wrote: > An instruction accessing a mmio address, generates a HDSI fault. This fault is > appropriately handled by the Hypervisor. However in the case of secureVMs, the > fault is delivered to the ultravisor. > > Unfortunately the Ultravisor has no correct-way to fetch the faulting > instruction. The PEF architecture does not allow Ultravisor to enable MMU > translation. Walking the two level page table to read the instruction can race > with other vcpus modifying the SVM's process scoped page table. > > This problem can be correctly solved with some help from the kernel. > > Capture the faulting instruction in SPRG0 register, before executing the > faulting instruction. This enables the ultravisor to easily procure the > faulting instruction and emulate it. Just a comment on the approach of putting the instruction in SPRG0: these I/O accessors can be used in interrupt routines, which means that if these accessors are ever used with interrupts enabled, there is the possibility of an external interrupt occurring between the instruction that sets SPRG0 and the load/store instruction that faults. If the handler for that interrupt itself does an I/O access, it will overwrite SPRG0, corrupting the value set by the interrupted code. The choices to fix that would seem to be (a) disable interrupts around all I/O accesses, (b) have the accessor save and restore SPRG0, or (c) solve the problem another way, such as by doing a H_LOGICAL_CI_LOAD or H_LOGICAL_CI_STORE hypercall. Paul.
On Wed, Jul 22, 2020 at 03:02:32PM +1000, Paul Mackerras wrote: > On Thu, Jul 16, 2020 at 01:32:13AM -0700, Ram Pai wrote: > > An instruction accessing a mmio address, generates a HDSI fault. This fault is > > appropriately handled by the Hypervisor. However in the case of secureVMs, the > > fault is delivered to the ultravisor. > > > > Unfortunately the Ultravisor has no correct-way to fetch the faulting > > instruction. The PEF architecture does not allow Ultravisor to enable MMU > > translation. Walking the two level page table to read the instruction can race > > with other vcpus modifying the SVM's process scoped page table. > > > > This problem can be correctly solved with some help from the kernel. > > > > Capture the faulting instruction in SPRG0 register, before executing the > > faulting instruction. This enables the ultravisor to easily procure the > > faulting instruction and emulate it. > > Just a comment on the approach of putting the instruction in SPRG0: > these I/O accessors can be used in interrupt routines, which means > that if these accessors are ever used with interrupts enabled, there > is the possibility of an external interrupt occurring between the > instruction that sets SPRG0 and the load/store instruction that > faults. If the handler for that interrupt itself does an I/O access, > it will overwrite SPRG0, corrupting the value set by the interrupted > code. Acutally my proposed code restores the value of SPRG0 before returning back to the interrupted instruction. So here is the sequence. I think it works. (1) store sprg0 in register Rx (lets say srpg0 had 0xc. Rx now contains 0xc) (2) save faulting instruction address in sprg0 (lets say the value is 0xa. sprg0 will contain 0xa). (3) <----- interrupt arrives (4) store sprg0 in register Ry ( Ry now contains 0xa ) (5) save faulting instruction address in sprg0 (lets say the value is 0xb). (6) 0xb: execute faulting instruction (7) restore Ry into sprg0 ( sprg0 now contains 0xa ) (8) <-- return from interrupt (9) 0xa: execute faulting instruction (10) restore Rx into sprg0 (sprg0 now contains 0xc) > > The choices to fix that would seem to be (a) disable interrupts around > all I/O accesses, (b) have the accessor save and restore SPRG0, or (c) > solve the problem another way, such as by doing a H_LOGICAL_CI_LOAD > or H_LOGICAL_CI_STORE hypercall. Ok. Will explore (c). Thanks, RP
On Wed, Jul 22, 2020 at 12:42:05AM -0700, Ram Pai wrote: > On Wed, Jul 22, 2020 at 03:02:32PM +1000, Paul Mackerras wrote: > > On Thu, Jul 16, 2020 at 01:32:13AM -0700, Ram Pai wrote: > > > An instruction accessing a mmio address, generates a HDSI fault. This fault is > > > appropriately handled by the Hypervisor. However in the case of secureVMs, the > > > fault is delivered to the ultravisor. > > > > > > Unfortunately the Ultravisor has no correct-way to fetch the faulting > > > instruction. The PEF architecture does not allow Ultravisor to enable MMU > > > translation. Walking the two level page table to read the instruction can race > > > with other vcpus modifying the SVM's process scoped page table. > > > > > > This problem can be correctly solved with some help from the kernel. > > > > > > Capture the faulting instruction in SPRG0 register, before executing the > > > faulting instruction. This enables the ultravisor to easily procure the > > > faulting instruction and emulate it. > > > > Just a comment on the approach of putting the instruction in SPRG0: > > these I/O accessors can be used in interrupt routines, which means > > that if these accessors are ever used with interrupts enabled, there > > is the possibility of an external interrupt occurring between the > > instruction that sets SPRG0 and the load/store instruction that > > faults. If the handler for that interrupt itself does an I/O access, > > it will overwrite SPRG0, corrupting the value set by the interrupted > > code. > > Acutally my proposed code restores the value of SPRG0 before returning back to > the interrupted instruction. So here is the sequence. I think it works. > > (1) store sprg0 in register Rx (lets say srpg0 had 0xc. Rx now contains 0xc) > > (2) save faulting instruction address in sprg0 (lets say the value is 0xa. > sprg0 will contain 0xa). Small correction. sprg0 does not store the address of the faulting instruction. It stores the isntruction itself. Regardless, the code should work, I think. RP
On Wed, Jul 22, 2020 at 12:06:06PM +1000, Michael Ellerman wrote: > Ram Pai <linuxram@us.ibm.com> writes: > > An instruction accessing a mmio address, generates a HDSI fault. This fault is > > appropriately handled by the Hypervisor. However in the case of secureVMs, the > > fault is delivered to the ultravisor. > > > > Unfortunately the Ultravisor has no correct-way to fetch the faulting > > instruction. The PEF architecture does not allow Ultravisor to enable MMU > > translation. Walking the two level page table to read the instruction can race > > with other vcpus modifying the SVM's process scoped page table. > > You're trying to read the guest's kernel text IIUC, that mapping should > be stable. Possibly permissions on it could change over time, but the > virtual -> real mapping should not. Actually the code does not capture the address of the instruction in the sprg0 register. It captures the instruction itself. So should the mapping matter? > > > This problem can be correctly solved with some help from the kernel. > > > > Capture the faulting instruction in SPRG0 register, before executing the > > faulting instruction. This enables the ultravisor to easily procure the > > faulting instruction and emulate it. > > This is not something I'm going to merge. Sorry. Ok. Will consider other approaches. RP
Ram Pai <linuxram@us.ibm.com> writes: > On Wed, Jul 22, 2020 at 12:06:06PM +1000, Michael Ellerman wrote: >> Ram Pai <linuxram@us.ibm.com> writes: >> > An instruction accessing a mmio address, generates a HDSI fault. This fault is >> > appropriately handled by the Hypervisor. However in the case of secureVMs, the >> > fault is delivered to the ultravisor. >> > >> > Unfortunately the Ultravisor has no correct-way to fetch the faulting >> > instruction. The PEF architecture does not allow Ultravisor to enable MMU >> > translation. Walking the two level page table to read the instruction can race >> > with other vcpus modifying the SVM's process scoped page table. >> >> You're trying to read the guest's kernel text IIUC, that mapping should >> be stable. Possibly permissions on it could change over time, but the >> virtual -> real mapping should not. > > Actually the code does not capture the address of the instruction in the > sprg0 register. It captures the instruction itself. So should the mapping > matter? Sorry that was talking about reading the instruction by doing the page walk, not with this patch applied. cheers
Ram Pai <linuxram@us.ibm.com> writes: > On Wed, Jul 22, 2020 at 12:06:06PM +1000, Michael Ellerman wrote: >> Ram Pai <linuxram@us.ibm.com> writes: >> > An instruction accessing a mmio address, generates a HDSI fault. This fault is >> > appropriately handled by the Hypervisor. However in the case of secureVMs, the >> > fault is delivered to the ultravisor. >> > >> > Unfortunately the Ultravisor has no correct-way to fetch the faulting >> > instruction. The PEF architecture does not allow Ultravisor to enable MMU >> > translation. Walking the two level page table to read the instruction can race >> > with other vcpus modifying the SVM's process scoped page table. >> >> You're trying to read the guest's kernel text IIUC, that mapping should >> be stable. Possibly permissions on it could change over time, but the >> virtual -> real mapping should not. > > Actually the code does not capture the address of the instruction in the > sprg0 register. It captures the instruction itself. So should the mapping > matter? >> >> > This problem can be correctly solved with some help from the kernel. >> > >> > Capture the faulting instruction in SPRG0 register, before executing the >> > faulting instruction. This enables the ultravisor to easily procure the >> > faulting instruction and emulate it. >> >> This is not something I'm going to merge. Sorry. > > Ok. Will consider other approaches. To elaborate ... You've basically invented a custom ucall ABI. But a really strange one which takes an instruction as its first parameter in SPRG0, and then subsequent parameters in any GPR depending on what the instruction was. The UV should either emulate the instruction, which means the guest should not be expected to do anything other than execute the instruction. Or it should be done with a proper ucall that the guest explicitly makes with a well defined ABI. cheers
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 635969b..7ef663d 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -35,6 +35,7 @@ #include <asm/mmu.h> #include <asm/ppc_asm.h> #include <asm/pgtable.h> +#include <asm/svm.h> #define SIO_CONFIG_RA 0x398 #define SIO_CONFIG_RD 0x399 @@ -105,34 +106,98 @@ static inline u##size name(const volatile u##size __iomem *addr) \ { \ u##size ret; \ - __asm__ __volatile__("sync;"#insn" %0,%y1;twi 0,%0,0;isync" \ - : "=r" (ret) : "Z" (*addr) : "memory"); \ + if (is_secure_guest()) { \ + __asm__ __volatile__("mfsprg0 %3;" \ + "lnia %2;" \ + "ld %2,12(%2);" \ + "mtsprg0 %2;" \ + "sync;" \ + #insn" %0,%y1;" \ + "twi 0,%0,0;" \ + "isync;" \ + "mtsprg0 %3" \ + : "=r" (ret) \ + : "Z" (*addr), "r" (0), "r" (0) \ + : "memory"); \ + } else { \ + __asm__ __volatile__("sync;" \ + #insn" %0,%y1;" \ + "twi 0,%0,0;" \ + "isync" \ + : "=r" (ret) : "Z" (*addr) : "memory"); \ + } \ return ret; \ } #define DEF_MMIO_OUT_X(name, size, insn) \ static inline void name(volatile u##size __iomem *addr, u##size val) \ { \ - __asm__ __volatile__("sync;"#insn" %1,%y0" \ - : "=Z" (*addr) : "r" (val) : "memory"); \ - mmiowb_set_pending(); \ + if (is_secure_guest()) { \ + __asm__ __volatile__("mfsprg0 %3;" \ + "lnia %2;" \ + "ld %2,12(%2);" \ + "mtsprg0 %2;" \ + "sync;" \ + #insn" %1,%y0;" \ + "mtsprg0 %3" \ + : "=Z" (*addr) \ + : "r" (val), "r" (0), "r" (0) \ + : "memory"); \ + } else { \ + __asm__ __volatile__("sync;" \ + #insn" %1,%y0" \ + : "=Z" (*addr) : "r" (val) : "memory"); \ + mmiowb_set_pending(); \ + } \ } #define DEF_MMIO_IN_D(name, size, insn) \ static inline u##size name(const volatile u##size __iomem *addr) \ { \ u##size ret; \ - __asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\ - : "=r" (ret) : "m" (*addr) : "memory"); \ + if (is_secure_guest()) { \ + __asm__ __volatile__("mfsprg0 %3;" \ + "lnia %2;" \ + "ld %2,12(%2);" \ + "mtsprg0 %2;" \ + "sync;" \ + #insn"%U1%X1 %0,%1;" \ + "twi 0,%0,0;" \ + "isync;" \ + "mtsprg0 %3" \ + : "=r" (ret) \ + : "m" (*addr), "r" (0), "r" (0) \ + : "memory"); \ + } else { \ + __asm__ __volatile__("sync;" \ + #insn"%U1%X1 %0,%1;" \ + "twi 0,%0,0;" \ + "isync" \ + : "=r" (ret) : "m" (*addr) : "memory"); \ + } \ return ret; \ } #define DEF_MMIO_OUT_D(name, size, insn) \ static inline void name(volatile u##size __iomem *addr, u##size val) \ { \ - __asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \ - : "=m" (*addr) : "r" (val) : "memory"); \ - mmiowb_set_pending(); \ + if (is_secure_guest()) { \ + __asm__ __volatile__("mfsprg0 %3;" \ + "lnia %2;" \ + "ld %2,12(%2);" \ + "mtsprg0 %2;" \ + "sync;" \ + #insn"%U0%X0 %1,%0;" \ + "mtsprg0 %3" \ + : "=m" (*addr) \ + : "r" (val), "r" (0), "r" (0) \ + : "memory"); \ + } else { \ + __asm__ __volatile__("sync;" \ + #insn"%U0%X0 %1,%0" \ + : "=m" (*addr) : "r" (val) : "memory"); \ + mmiowb_set_pending(); \ + } \ } DEF_MMIO_IN_D(in_8, 8, lbz);
An instruction accessing a mmio address, generates a HDSI fault. This fault is appropriately handled by the Hypervisor. However in the case of secureVMs, the fault is delivered to the ultravisor. Unfortunately the Ultravisor has no correct-way to fetch the faulting instruction. The PEF architecture does not allow Ultravisor to enable MMU translation. Walking the two level page table to read the instruction can race with other vcpus modifying the SVM's process scoped page table. This problem can be correctly solved with some help from the kernel. Capture the faulting instruction in SPRG0 register, before executing the faulting instruction. This enables the ultravisor to easily procure the faulting instruction and emulate it. Signed-off-by: Ram Pai <linuxram@us.ibm.com> --- arch/powerpc/include/asm/io.h | 85 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 10 deletions(-)