Message ID | 1254744999-3158-4-git-send-email-Joakim.Tjernlund@transmode.se (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
On Mon, 2009-10-05 at 14:16 +0200, Joakim Tjernlund wrote: > Update the TLB asm to make proper use of _PAGE_DIRY and _PAGE_ACCESSED. > Pros: > - I/D TLB Miss never needs to write to the linux pte. > - _PAGE_ACCESSED is only set on I/D TLB Error fixing accounting > - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly > when a page has been made dirty. Not sure here. You seem to still set those from asm. Is that necessary ? The approach I take on BookE is to simply not set these from asm, -and- (this is important) not set write permission if dirty is not set in the TLB miss and set no access permission at all when accessed is not set. This is important or we'll miss dirty updates which can be fatal. The C code in handle_pte_fault() will fixup missing access and dirty if necessary and flush. Also look at the 440 code, I think you could simplify your implementation using similar techniques, such as andc of PTE against requested bits etc... and thus maybe save a couple of insns. Cheers, Ben. > - Proper RO/RW mapping of user space. > - Free up 2 SW TLB bits in the linux pte(add back _PAGE_WRITETHRU ?) > Cons: > - 4 more instructions in I/D TLB Miss, but the since the linux pte is > not written anymore, it should still be a win. > --- > arch/powerpc/include/asm/pte-8xx.h | 9 +- > arch/powerpc/kernel/head_8xx.S | 163 ++++++++++++++++++++++++++---------- > 2 files changed, 122 insertions(+), 50 deletions(-) > > diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h > index 8c6e312..af541a2 100644 > --- a/arch/powerpc/include/asm/pte-8xx.h > +++ b/arch/powerpc/include/asm/pte-8xx.h > @@ -32,22 +32,21 @@ > #define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */ > #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ > #define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ > +#define _PAGE_DIRTY 0x0100 /* C: page changed */ > > /* These five software bits must be masked out when the entry is loaded > * into the TLB. > */ > #define _PAGE_EXEC 0x0008 /* software: i-cache coherency required */ > #define _PAGE_GUARDED 0x0010 /* software: guarded access */ > -#define _PAGE_DIRTY 0x0020 /* software: page changed */ > -#define _PAGE_RW 0x0040 /* software: user write access allowed */ > -#define _PAGE_ACCESSED 0x0080 /* software: page referenced */ > +#define _PAGE_USER 0x0020 /* software: User space access */ > > /* Setting any bits in the nibble with the follow two controls will > * require a TLB exception handler change. It is assumed unused bits > * are always zero. > */ > -#define _PAGE_HWWRITE 0x0100 /* h/w write enable: never set in Linux PTE */ > -#define _PAGE_USER 0x0800 /* One of the PP bits, the other is USER&~RW */ > +#define _PAGE_RW 0x0400 /* lsb PP bits */ > +#define _PAGE_ACCESSED 0x0800 /* msb PP bits */ > > #define _PMD_PRESENT 0x0001 > #define _PMD_BAD 0x0ff0 > diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S > index 118bb05..b1f72d9 100644 > --- a/arch/powerpc/kernel/head_8xx.S > +++ b/arch/powerpc/kernel/head_8xx.S > @@ -333,21 +333,15 @@ InstructionTLBMiss: > mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ > lwz r10, 0(r11) /* Get the pte */ > > -#ifdef CONFIG_SWAP > - /* do not set the _PAGE_ACCESSED bit of a non-present page */ > - andi. r11, r10, _PAGE_PRESENT > - beq 4f > - ori r10, r10, _PAGE_ACCESSED > - mfspr r11, SPRN_MD_TWC /* get the pte address again */ > - stw r10, 0(r11) > -4: > -#else > - ori r10, r10, _PAGE_ACCESSED > - stw r10, 0(r11) > -#endif > - > + andi. r11, r10, _PAGE_USER | _PAGE_ACCESSED > + cmpwi cr0, r11, _PAGE_USER | _PAGE_ACCESSED > + beq+ cr0, 5f /* branch if access allowed */ > + rlwinm r10, r10, 0, 22, 19 /* r10 &= ~(_PAGE_ACCESSED | _PAGE_RW) */ > + b 6f > +5: xori r10, r10, _PAGE_RW /* invert RW bit */ > +6: > /* The Linux PTE won't go exactly into the MMU TLB. > - * Software indicator bits 21, 22 and 28 must be clear. > + * Software indicator bit 28 must be clear. > * Software indicator bits 24, 25, 26, and 27 must be > * set. All other Linux PTE bits control the behavior > * of the MMU. > @@ -409,21 +403,15 @@ DataStoreTLBMiss: > DO_8xx_CPU6(0x3b80, r3) > mtspr SPRN_MD_TWC, r11 > > -#ifdef CONFIG_SWAP > - /* do not set the _PAGE_ACCESSED bit of a non-present page */ > - andi. r11, r10, _PAGE_PRESENT > - beq 4f > - ori r10, r10, _PAGE_ACCESSED > -4: > - /* and update pte in table */ > -#else > - ori r10, r10, _PAGE_ACCESSED > -#endif > - mfspr r11, SPRN_MD_TWC /* get the pte address again */ > - stw r10, 0(r11) > - > + andi. r11, r10, _PAGE_USER | _PAGE_ACCESSED > + cmpwi cr0, r11, _PAGE_USER | _PAGE_ACCESSED > + beq+ cr0, 5f /* branch if access allowed */ > + rlwinm r10, r10, 0, 22, 19 /* r10 &= ~(_PAGE_ACCESSED | _PAGE_RW) */ > + b 6f > +5: xori r10, r10, _PAGE_RW /* invert RW bit */ > +6: > /* The Linux PTE won't go exactly into the MMU TLB. > - * Software indicator bits 21, 22 and 28 must be clear. > + * Software indicator bit 28 must be clear. > * Software indicator bits 24, 25, 26, and 27 must be > * set. All other Linux PTE bits control the behavior > * of the MMU. > @@ -448,6 +436,91 @@ DataStoreTLBMiss: > */ > . = 0x1300 > InstructionTLBError: > +#ifdef CONFIG_8xx_CPU6 > + stw r3, 8(r0) > +#endif > + DO_8xx_CPU6(0x3f80, r3) > + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ > + mfcr r10 > + stw r10, 0(r0) > + stw r11, 4(r0) > + > + mfspr r11, SPRN_SRR1 > + andis. r11, r11, 0x5000 /* no translation, guarded */ > + bne 2f > + > + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ > +#ifdef CONFIG_8xx_CPU15 > + addi r11, r10, 0x1000 > + tlbie r11 > + addi r11, r10, -0x1000 > + tlbie r11 > +#endif > + DO_8xx_CPU6(0x3780, r3) > + mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ > + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ > + > + /* If we are faulting a kernel address, we have to use the > + * kernel page tables. > + */ > + andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ > + beq 3f > + lis r11, swapper_pg_dir@h > + ori r11, r11, swapper_pg_dir@l > + rlwimi r10, r11, 0, 2, 19 > +3: > + lwz r11, 0(r10) /* Get the level 1 entry */ > + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ > + beq 2f /* If zero, don't try to find a pte */ > + > + /* We have a pte table, so load the MI_TWC with the attributes > + * for this "segment." > + */ > + ori r11,r11,1 /* Set valid bit */ > + DO_8xx_CPU6(0x2b80, r3) > + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ > + DO_8xx_CPU6(0x3b80, r3) > + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ > + > + mfspr r11, SPRN_SRR1 > + andi. r11, r11, 0x4000 /* MSR[PR] */ > + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ > + lwz r10, 0(r11) /* Get the pte */ > + beq 5f /* Kernel access always OK */ > + andi. r11,r10, _PAGE_USER > + beq 2f > +5: ori r10, r10, _PAGE_ACCESSED > + mfspr r21, SPRN_MD_TWC /* ....and get the pte address */ > + stw r10, 0(r11) > + xori r10, r10, _PAGE_RW /* invert RW bit */ > + > + /* The Linux PTE won't go exactly into the MMU TLB. > + * Software indicator bit 28 must be clear. > + * Software indicator bits 24, 25, 26, and 27 must be > + * set. All other Linux PTE bits control the behavior > + * of the MMU. > + */ > + li r11, 0x00f0 > + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ > + DO_8xx_CPU6(0x2d80, r3) > + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ > + > + mfspr r10, SPRN_M_TW /* Restore registers */ > + lwz r11, 0(r0) > + mtcr r11 > + lwz r11, 4(r0) > +#ifdef CONFIG_8xx_CPU6 > + lwz r3, 8(r0) > +#endif > + rfi > + > +2: mfspr r10, SPRN_M_TW /* Restore registers */ > + lwz r11, 0(r0) > + mtcr r11 > + lwz r11, 4(r0) > +#ifdef CONFIG_8xx_CPU6 > + lwz r3, 8(r0) > +#endif > b InstructionAccess > > /* This is the data TLB error on the MPC8xx. This could be due to > @@ -472,8 +545,8 @@ DataTLBError: > /* First, make sure this was a store operation. > */ > mfspr r10, SPRN_DSISR > - andis. r11, r10, 0x4800 /* no translation, no permission. */ > - bne 2f /* branch if either is set */ > + andis. r11, r10, 0x4000 /* no translation */ > + bne 2f /* branch if set */ > > /* The EA of a data TLB miss is automatically stored in the MD_EPN > * register. The EA of a data TLB error is automatically stored in > @@ -522,26 +595,26 @@ DataTLBError: > mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ > lwz r10, 0(r11) /* Get the pte */ > > - andi. r11, r10, _PAGE_RW /* Is it writeable? */ > - beq 2f /* Bail out if not */ > + mfspr r11, SPRN_SRR1 > + andi. r11, r11, 0x4000 /* MSR[PR] */ > + beq 5f /* Kernel access always OK */ > + andi. r11,r10, _PAGE_USER > + beq 2f > +5: mfspr r11, SPRN_DSISR > + andis. r11, r11, 0x0200 /* store */ > + beq 6f > + andi. r11, r10, _PAGE_RW /* writeable? */ > + beq 2f /* branch if not */ > + ori r10, r10, _PAGE_DIRTY | _PAGE_HWWRITE > +6: ori r10, r10, _PAGE_ACCESSED > > - /* Update 'changed', among others. > - */ > -#ifdef CONFIG_SWAP > - ori r10, r10, _PAGE_DIRTY|_PAGE_HWWRITE > - /* do not set the _PAGE_ACCESSED bit of a non-present page */ > - andi. r11, r10, _PAGE_PRESENT > - beq 4f > - ori r10, r10, _PAGE_ACCESSED > -4: > -#else > - ori r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE > -#endif > mfspr r11, SPRN_MD_TWC /* Get pte address again */ > stw r10, 0(r11) /* and update pte in table */ > > + xori r10, r10, _PAGE_RW /* Invert RW bit */ > + > /* The Linux PTE won't go exactly into the MMU TLB. > - * Software indicator bits 21, 22 and 28 must be clear. > + * Software indicator bit 28 must be clear. > * Software indicator bits 24, 25, 26, and 27 must be > * set. All other Linux PTE bits control the behavior > * of the MMU.
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 05/10/2009 22:17:04: > > On Mon, 2009-10-05 at 14:16 +0200, Joakim Tjernlund wrote: > > Update the TLB asm to make proper use of _PAGE_DIRY and _PAGE_ACCESSED. > > Pros: > > - I/D TLB Miss never needs to write to the linux pte. > > - _PAGE_ACCESSED is only set on I/D TLB Error fixing accounting > > - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly > > when a page has been made dirty. > > Not sure here. You seem to still set those from asm. Is that necessary ? Well, yes. head_32.S also sets ACCESSED and DIRTY from asm so why not me? The basic method I use is: TLB gets mapped with NoAccess, then the first access will trap into TLB error, where ACCESSED will be set if permission to access the page is OK (and RW too). On first store 8xx will trap into DTLB error and permissions is OK, DIRTY will be set too. Is this wrong? Do I have to trap to C for first store? > > The approach I take on BookE is to simply not set these from asm, -and- > (this is important) not set write permission if dirty is not set in > the TLB miss and set no access permission at all when accessed is not > set. This is important or we'll miss dirty updates which can > be fatal. not sure, but this seems similar to what I do. DIRTY will be set, in asm, on first store. ACCESSED will only be set iff (USER && VALID) > > The C code in handle_pte_fault() will fixup missing access and dirty > if necessary and flush. > > Also look at the 440 code, I think you could simplify your > implementation using similar techniques, such as andc of PTE against > requested bits etc... and thus maybe save a couple of insns. Great, but first I want to make sure I doing it right :) So is there some golden rule I have to follow? Jocke > > Cheers, > Ben. > > > - Proper RO/RW mapping of user space. > > - Free up 2 SW TLB bits in the linux pte(add back _PAGE_WRITETHRU ?) > > Cons: > > - 4 more instructions in I/D TLB Miss, but the since the linux pte is > > not written anymore, it should still be a win. > > --- > > arch/powerpc/include/asm/pte-8xx.h | 9 +- > > arch/powerpc/kernel/head_8xx.S | 163 ++++++++++++++++++++++++++---------- > > 2 files changed, 122 insertions(+), 50 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h > > index 8c6e312..af541a2 100644 > > --- a/arch/powerpc/include/asm/pte-8xx.h > > +++ b/arch/powerpc/include/asm/pte-8xx.h > > @@ -32,22 +32,21 @@ > > #define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */ > > #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ > > #define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ > > +#define _PAGE_DIRTY 0x0100 /* C: page changed */ > > > > /* These five software bits must be masked out when the entry is loaded > > * into the TLB. > > */ > > #define _PAGE_EXEC 0x0008 /* software: i-cache coherency required */ > > #define _PAGE_GUARDED 0x0010 /* software: guarded access */ > > -#define _PAGE_DIRTY 0x0020 /* software: page changed */ > > -#define _PAGE_RW 0x0040 /* software: user write access allowed */ > > -#define _PAGE_ACCESSED 0x0080 /* software: page referenced */ > > +#define _PAGE_USER 0x0020 /* software: User space access */ > > > > /* Setting any bits in the nibble with the follow two controls will > > * require a TLB exception handler change. It is assumed unused bits > > * are always zero. > > */ > > -#define _PAGE_HWWRITE 0x0100 /* h/w write enable: never set in Linux PTE */ > > -#define _PAGE_USER 0x0800 /* One of the PP bits, the other is USER&~RW */ > > +#define _PAGE_RW 0x0400 /* lsb PP bits */ > > +#define _PAGE_ACCESSED 0x0800 /* msb PP bits */ > > > > #define _PMD_PRESENT 0x0001 > > #define _PMD_BAD 0x0ff0 > > diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S > > index 118bb05..b1f72d9 100644 > > --- a/arch/powerpc/kernel/head_8xx.S > > +++ b/arch/powerpc/kernel/head_8xx.S > > @@ -333,21 +333,15 @@ InstructionTLBMiss: > > mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ > > lwz r10, 0(r11) /* Get the pte */ > > > > -#ifdef CONFIG_SWAP > > - /* do not set the _PAGE_ACCESSED bit of a non-present page */ > > - andi. r11, r10, _PAGE_PRESENT > > - beq 4f > > - ori r10, r10, _PAGE_ACCESSED > > - mfspr r11, SPRN_MD_TWC /* get the pte address again */ > > - stw r10, 0(r11) > > -4: > > -#else > > - ori r10, r10, _PAGE_ACCESSED > > - stw r10, 0(r11) > > -#endif > > - > > + andi. r11, r10, _PAGE_USER | _PAGE_ACCESSED > > + cmpwi cr0, r11, _PAGE_USER | _PAGE_ACCESSED > > + beq+ cr0, 5f /* branch if access allowed */ > > + rlwinm r10, r10, 0, 22, 19 /* r10 &= ~(_PAGE_ACCESSED | _PAGE_RW) */ > > + b 6f > > +5: xori r10, r10, _PAGE_RW /* invert RW bit */ > > +6: > > /* The Linux PTE won't go exactly into the MMU TLB. > > - * Software indicator bits 21, 22 and 28 must be clear. > > + * Software indicator bit 28 must be clear. > > * Software indicator bits 24, 25, 26, and 27 must be > > * set. All other Linux PTE bits control the behavior > > * of the MMU. > > @@ -409,21 +403,15 @@ DataStoreTLBMiss: > > DO_8xx_CPU6(0x3b80, r3) > > mtspr SPRN_MD_TWC, r11 > > > > -#ifdef CONFIG_SWAP > > - /* do not set the _PAGE_ACCESSED bit of a non-present page */ > > - andi. r11, r10, _PAGE_PRESENT > > - beq 4f > > - ori r10, r10, _PAGE_ACCESSED > > -4: > > - /* and update pte in table */ > > -#else > > - ori r10, r10, _PAGE_ACCESSED > > -#endif > > - mfspr r11, SPRN_MD_TWC /* get the pte address again */ > > - stw r10, 0(r11) > > - > > + andi. r11, r10, _PAGE_USER | _PAGE_ACCESSED > > + cmpwi cr0, r11, _PAGE_USER | _PAGE_ACCESSED > > + beq+ cr0, 5f /* branch if access allowed */ > > + rlwinm r10, r10, 0, 22, 19 /* r10 &= ~(_PAGE_ACCESSED | _PAGE_RW) */ > > + b 6f > > +5: xori r10, r10, _PAGE_RW /* invert RW bit */ > > +6: > > /* The Linux PTE won't go exactly into the MMU TLB. > > - * Software indicator bits 21, 22 and 28 must be clear. > > + * Software indicator bit 28 must be clear. > > * Software indicator bits 24, 25, 26, and 27 must be > > * set. All other Linux PTE bits control the behavior > > * of the MMU. > > @@ -448,6 +436,91 @@ DataStoreTLBMiss: > > */ > > . = 0x1300 > > InstructionTLBError: > > +#ifdef CONFIG_8xx_CPU6 > > + stw r3, 8(r0) > > +#endif > > + DO_8xx_CPU6(0x3f80, r3) > > + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ > > + mfcr r10 > > + stw r10, 0(r0) > > + stw r11, 4(r0) > > + > > + mfspr r11, SPRN_SRR1 > > + andis. r11, r11, 0x5000 /* no translation, guarded */ > > + bne 2f > > + > > + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ > > +#ifdef CONFIG_8xx_CPU15 > > + addi r11, r10, 0x1000 > > + tlbie r11 > > + addi r11, r10, -0x1000 > > + tlbie r11 > > +#endif > > + DO_8xx_CPU6(0x3780, r3) > > + mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ > > + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ > > + > > + /* If we are faulting a kernel address, we have to use the > > + * kernel page tables. > > + */ > > + andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ > > + beq 3f > > + lis r11, swapper_pg_dir@h > > + ori r11, r11, swapper_pg_dir@l > > + rlwimi r10, r11, 0, 2, 19 > > +3: > > + lwz r11, 0(r10) /* Get the level 1 entry */ > > + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ > > + beq 2f /* If zero, don't try to find a pte */ > > + > > + /* We have a pte table, so load the MI_TWC with the attributes > > + * for this "segment." > > + */ > > + ori r11,r11,1 /* Set valid bit */ > > + DO_8xx_CPU6(0x2b80, r3) > > + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ > > + DO_8xx_CPU6(0x3b80, r3) > > + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ > > + > > + mfspr r11, SPRN_SRR1 > > + andi. r11, r11, 0x4000 /* MSR[PR] */ > > + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ > > + lwz r10, 0(r11) /* Get the pte */ > > + beq 5f /* Kernel access always OK */ > > + andi. r11,r10, _PAGE_USER > > + beq 2f > > +5: ori r10, r10, _PAGE_ACCESSED > > + mfspr r21, SPRN_MD_TWC /* ....and get the pte address */ > > + stw r10, 0(r11) > > + xori r10, r10, _PAGE_RW /* invert RW bit */ > > + > > + /* The Linux PTE won't go exactly into the MMU TLB. > > + * Software indicator bit 28 must be clear. > > + * Software indicator bits 24, 25, 26, and 27 must be > > + * set. All other Linux PTE bits control the behavior > > + * of the MMU. > > + */ > > + li r11, 0x00f0 > > + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ > > + DO_8xx_CPU6(0x2d80, r3) > > + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ > > + > > + mfspr r10, SPRN_M_TW /* Restore registers */ > > + lwz r11, 0(r0) > > + mtcr r11 > > + lwz r11, 4(r0) > > +#ifdef CONFIG_8xx_CPU6 > > + lwz r3, 8(r0) > > +#endif > > + rfi > > + > > +2: mfspr r10, SPRN_M_TW /* Restore registers */ > > + lwz r11, 0(r0) > > + mtcr r11 > > + lwz r11, 4(r0) > > +#ifdef CONFIG_8xx_CPU6 > > + lwz r3, 8(r0) > > +#endif > > b InstructionAccess > > > > /* This is the data TLB error on the MPC8xx. This could be due to > > @@ -472,8 +545,8 @@ DataTLBError: > > /* First, make sure this was a store operation. > > */ > > mfspr r10, SPRN_DSISR > > - andis. r11, r10, 0x4800 /* no translation, no permission. */ > > - bne 2f /* branch if either is set */ > > + andis. r11, r10, 0x4000 /* no translation */ > > + bne 2f /* branch if set */ > > > > /* The EA of a data TLB miss is automatically stored in the MD_EPN > > * register. The EA of a data TLB error is automatically stored in > > @@ -522,26 +595,26 @@ DataTLBError: > > mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ > > lwz r10, 0(r11) /* Get the pte */ > > > > - andi. r11, r10, _PAGE_RW /* Is it writeable? */ > > - beq 2f /* Bail out if not */ > > + mfspr r11, SPRN_SRR1 > > + andi. r11, r11, 0x4000 /* MSR[PR] */ > > + beq 5f /* Kernel access always OK */ > > + andi. r11,r10, _PAGE_USER > > + beq 2f > > +5: mfspr r11, SPRN_DSISR > > + andis. r11, r11, 0x0200 /* store */ > > + beq 6f > > + andi. r11, r10, _PAGE_RW /* writeable? */ > > + beq 2f /* branch if not */ > > + ori r10, r10, _PAGE_DIRTY | _PAGE_HWWRITE > > +6: ori r10, r10, _PAGE_ACCESSED > > > > - /* Update 'changed', among others. > > - */ > > -#ifdef CONFIG_SWAP > > - ori r10, r10, _PAGE_DIRTY|_PAGE_HWWRITE > > - /* do not set the _PAGE_ACCESSED bit of a non-present page */ > > - andi. r11, r10, _PAGE_PRESENT > > - beq 4f > > - ori r10, r10, _PAGE_ACCESSED > > -4: > > -#else > > - ori r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE > > -#endif > > mfspr r11, SPRN_MD_TWC /* Get pte address again */ > > stw r10, 0(r11) /* and update pte in table */ > > > > + xori r10, r10, _PAGE_RW /* Invert RW bit */ > > + > > /* The Linux PTE won't go exactly into the MMU TLB. > > - * Software indicator bits 21, 22 and 28 must be clear. > > + * Software indicator bit 28 must be clear. > > * Software indicator bits 24, 25, 26, and 27 must be > > * set. All other Linux PTE bits control the behavior > > * of the MMU. > > > >
On Mon, 2009-10-05 at 23:25 +0200, Joakim Tjernlund wrote: > > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 05/10/2009 22:17:04: > > > > On Mon, 2009-10-05 at 14:16 +0200, Joakim Tjernlund wrote: > > > Update the TLB asm to make proper use of _PAGE_DIRY and _PAGE_ACCESSED. > > > Pros: > > > - I/D TLB Miss never needs to write to the linux pte. > > > - _PAGE_ACCESSED is only set on I/D TLB Error fixing accounting > > > - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly > > > when a page has been made dirty. > > > > Not sure here. You seem to still set those from asm. Is that necessary ? > > Well, yes. head_32.S also sets ACCESSED and DIRTY from asm so why not me? Don't look at the hash 32 code :-) > The basic method I use is: > TLB gets mapped with NoAccess, then the first access will trap > into TLB error, where ACCESSED will be set if permission to access > the page is OK (and RW too). On first store 8xx will trap > into DTLB error and permissions is OK, DIRTY will be set too. > Is this wrong? >From your explanation it looks ok. IE. as long as !DIRTY -> not writeable (will fault on store) and !ACCESSED means not accessible (will fault on any access) then you are ok. > Do I have to trap to C for first store? Most of the time you do anyways since the PTE isn't populated at all. At which point, Linux will populate a PTE with DIRTY and ACCESSED already set. It should be reasonably rare to actually fault because DIRTY and/or ACCESSED are missing. > > The approach I take on BookE is to simply not set these from asm, -and- > > (this is important) not set write permission if dirty is not set in > > the TLB miss and set no access permission at all when accessed is not > > set. This is important or we'll miss dirty updates which can > > be fatal. > > not sure, but this seems similar to what I do. DIRTY will be set, > in asm, on first store. Don't set DIRTY if !VALID > ACCESSED will only be set iff (USER && VALID) My point is that you should be able to simplify the code here, have only the TLB miss look at the PTE, not the data access and instruction access, and have the later be a boring exception going straight to C. > > > > The C code in handle_pte_fault() will fixup missing access and dirty > > if necessary and flush. > > > > Also look at the 440 code, I think you could simplify your > > implementation using similar techniques, such as andc of PTE against > > requested bits etc... and thus maybe save a couple of insns. > > Great, but first I want to make sure I doing it right :) > > So is there some golden rule I have to follow? Mostly only !ACCESSED -> no access permitted and !DIRTY -> no store permitted and don't write anything back if !VALID. Cheers, Ben. > Jocke > > > > > Cheers, > > Ben. > > > > > - Proper RO/RW mapping of user space. > > > - Free up 2 SW TLB bits in the linux pte(add back _PAGE_WRITETHRU ?) > > > Cons: > > > - 4 more instructions in I/D TLB Miss, but the since the linux pte is > > > not written anymore, it should still be a win. > > > --- > > > arch/powerpc/include/asm/pte-8xx.h | 9 +- > > > arch/powerpc/kernel/head_8xx.S | 163 ++++++++++++++++++++++++++---------- > > > 2 files changed, 122 insertions(+), 50 deletions(-) > > > > > > diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h > > > index 8c6e312..af541a2 100644 > > > --- a/arch/powerpc/include/asm/pte-8xx.h > > > +++ b/arch/powerpc/include/asm/pte-8xx.h > > > @@ -32,22 +32,21 @@ > > > #define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */ > > > #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ > > > #define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ > > > +#define _PAGE_DIRTY 0x0100 /* C: page changed */ > > > > > > /* These five software bits must be masked out when the entry is loaded > > > * into the TLB. > > > */ > > > #define _PAGE_EXEC 0x0008 /* software: i-cache coherency required */ > > > #define _PAGE_GUARDED 0x0010 /* software: guarded access */ > > > -#define _PAGE_DIRTY 0x0020 /* software: page changed */ > > > -#define _PAGE_RW 0x0040 /* software: user write access allowed */ > > > -#define _PAGE_ACCESSED 0x0080 /* software: page referenced */ > > > +#define _PAGE_USER 0x0020 /* software: User space access */ > > > > > > /* Setting any bits in the nibble with the follow two controls will > > > * require a TLB exception handler change. It is assumed unused bits > > > * are always zero. > > > */ > > > -#define _PAGE_HWWRITE 0x0100 /* h/w write enable: never set in Linux PTE */ > > > -#define _PAGE_USER 0x0800 /* One of the PP bits, the other is USER&~RW */ > > > +#define _PAGE_RW 0x0400 /* lsb PP bits */ > > > +#define _PAGE_ACCESSED 0x0800 /* msb PP bits */ > > > > > > #define _PMD_PRESENT 0x0001 > > > #define _PMD_BAD 0x0ff0 > > > diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S > > > index 118bb05..b1f72d9 100644 > > > --- a/arch/powerpc/kernel/head_8xx.S > > > +++ b/arch/powerpc/kernel/head_8xx.S > > > @@ -333,21 +333,15 @@ InstructionTLBMiss: > > > mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ > > > lwz r10, 0(r11) /* Get the pte */ > > > > > > -#ifdef CONFIG_SWAP > > > - /* do not set the _PAGE_ACCESSED bit of a non-present page */ > > > - andi. r11, r10, _PAGE_PRESENT > > > - beq 4f > > > - ori r10, r10, _PAGE_ACCESSED > > > - mfspr r11, SPRN_MD_TWC /* get the pte address again */ > > > - stw r10, 0(r11) > > > -4: > > > -#else > > > - ori r10, r10, _PAGE_ACCESSED > > > - stw r10, 0(r11) > > > -#endif > > > - > > > + andi. r11, r10, _PAGE_USER | _PAGE_ACCESSED > > > + cmpwi cr0, r11, _PAGE_USER | _PAGE_ACCESSED > > > + beq+ cr0, 5f /* branch if access allowed */ > > > + rlwinm r10, r10, 0, 22, 19 /* r10 &= ~(_PAGE_ACCESSED | _PAGE_RW) */ > > > + b 6f > > > +5: xori r10, r10, _PAGE_RW /* invert RW bit */ > > > +6: > > > /* The Linux PTE won't go exactly into the MMU TLB. > > > - * Software indicator bits 21, 22 and 28 must be clear. > > > + * Software indicator bit 28 must be clear. > > > * Software indicator bits 24, 25, 26, and 27 must be > > > * set. All other Linux PTE bits control the behavior > > > * of the MMU. > > > @@ -409,21 +403,15 @@ DataStoreTLBMiss: > > > DO_8xx_CPU6(0x3b80, r3) > > > mtspr SPRN_MD_TWC, r11 > > > > > > -#ifdef CONFIG_SWAP > > > - /* do not set the _PAGE_ACCESSED bit of a non-present page */ > > > - andi. r11, r10, _PAGE_PRESENT > > > - beq 4f > > > - ori r10, r10, _PAGE_ACCESSED > > > -4: > > > - /* and update pte in table */ > > > -#else > > > - ori r10, r10, _PAGE_ACCESSED > > > -#endif > > > - mfspr r11, SPRN_MD_TWC /* get the pte address again */ > > > - stw r10, 0(r11) > > > - > > > + andi. r11, r10, _PAGE_USER | _PAGE_ACCESSED > > > + cmpwi cr0, r11, _PAGE_USER | _PAGE_ACCESSED > > > + beq+ cr0, 5f /* branch if access allowed */ > > > + rlwinm r10, r10, 0, 22, 19 /* r10 &= ~(_PAGE_ACCESSED | _PAGE_RW) */ > > > + b 6f > > > +5: xori r10, r10, _PAGE_RW /* invert RW bit */ > > > +6: > > > /* The Linux PTE won't go exactly into the MMU TLB. > > > - * Software indicator bits 21, 22 and 28 must be clear. > > > + * Software indicator bit 28 must be clear. > > > * Software indicator bits 24, 25, 26, and 27 must be > > > * set. All other Linux PTE bits control the behavior > > > * of the MMU. > > > @@ -448,6 +436,91 @@ DataStoreTLBMiss: > > > */ > > > . = 0x1300 > > > InstructionTLBError: > > > +#ifdef CONFIG_8xx_CPU6 > > > + stw r3, 8(r0) > > > +#endif > > > + DO_8xx_CPU6(0x3f80, r3) > > > + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ > > > + mfcr r10 > > > + stw r10, 0(r0) > > > + stw r11, 4(r0) > > > + > > > + mfspr r11, SPRN_SRR1 > > > + andis. r11, r11, 0x5000 /* no translation, guarded */ > > > + bne 2f > > > + > > > + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ > > > +#ifdef CONFIG_8xx_CPU15 > > > + addi r11, r10, 0x1000 > > > + tlbie r11 > > > + addi r11, r10, -0x1000 > > > + tlbie r11 > > > +#endif > > > + DO_8xx_CPU6(0x3780, r3) > > > + mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ > > > + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ > > > + > > > + /* If we are faulting a kernel address, we have to use the > > > + * kernel page tables. > > > + */ > > > + andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ > > > + beq 3f > > > + lis r11, swapper_pg_dir@h > > > + ori r11, r11, swapper_pg_dir@l > > > + rlwimi r10, r11, 0, 2, 19 > > > +3: > > > + lwz r11, 0(r10) /* Get the level 1 entry */ > > > + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ > > > + beq 2f /* If zero, don't try to find a pte */ > > > + > > > + /* We have a pte table, so load the MI_TWC with the attributes > > > + * for this "segment." > > > + */ > > > + ori r11,r11,1 /* Set valid bit */ > > > + DO_8xx_CPU6(0x2b80, r3) > > > + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ > > > + DO_8xx_CPU6(0x3b80, r3) > > > + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ > > > + > > > + mfspr r11, SPRN_SRR1 > > > + andi. r11, r11, 0x4000 /* MSR[PR] */ > > > + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ > > > + lwz r10, 0(r11) /* Get the pte */ > > > + beq 5f /* Kernel access always OK */ > > > + andi. r11,r10, _PAGE_USER > > > + beq 2f > > > +5: ori r10, r10, _PAGE_ACCESSED > > > + mfspr r21, SPRN_MD_TWC /* ....and get the pte address */ > > > + stw r10, 0(r11) > > > + xori r10, r10, _PAGE_RW /* invert RW bit */ > > > + > > > + /* The Linux PTE won't go exactly into the MMU TLB. > > > + * Software indicator bit 28 must be clear. > > > + * Software indicator bits 24, 25, 26, and 27 must be > > > + * set. All other Linux PTE bits control the behavior > > > + * of the MMU. > > > + */ > > > + li r11, 0x00f0 > > > + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ > > > + DO_8xx_CPU6(0x2d80, r3) > > > + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ > > > + > > > + mfspr r10, SPRN_M_TW /* Restore registers */ > > > + lwz r11, 0(r0) > > > + mtcr r11 > > > + lwz r11, 4(r0) > > > +#ifdef CONFIG_8xx_CPU6 > > > + lwz r3, 8(r0) > > > +#endif > > > + rfi > > > + > > > +2: mfspr r10, SPRN_M_TW /* Restore registers */ > > > + lwz r11, 0(r0) > > > + mtcr r11 > > > + lwz r11, 4(r0) > > > +#ifdef CONFIG_8xx_CPU6 > > > + lwz r3, 8(r0) > > > +#endif > > > b InstructionAccess > > > > > > /* This is the data TLB error on the MPC8xx. This could be due to > > > @@ -472,8 +545,8 @@ DataTLBError: > > > /* First, make sure this was a store operation. > > > */ > > > mfspr r10, SPRN_DSISR > > > - andis. r11, r10, 0x4800 /* no translation, no permission. */ > > > - bne 2f /* branch if either is set */ > > > + andis. r11, r10, 0x4000 /* no translation */ > > > + bne 2f /* branch if set */ > > > > > > /* The EA of a data TLB miss is automatically stored in the MD_EPN > > > * register. The EA of a data TLB error is automatically stored in > > > @@ -522,26 +595,26 @@ DataTLBError: > > > mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ > > > lwz r10, 0(r11) /* Get the pte */ > > > > > > - andi. r11, r10, _PAGE_RW /* Is it writeable? */ > > > - beq 2f /* Bail out if not */ > > > + mfspr r11, SPRN_SRR1 > > > + andi. r11, r11, 0x4000 /* MSR[PR] */ > > > + beq 5f /* Kernel access always OK */ > > > + andi. r11,r10, _PAGE_USER > > > + beq 2f > > > +5: mfspr r11, SPRN_DSISR > > > + andis. r11, r11, 0x0200 /* store */ > > > + beq 6f > > > + andi. r11, r10, _PAGE_RW /* writeable? */ > > > + beq 2f /* branch if not */ > > > + ori r10, r10, _PAGE_DIRTY | _PAGE_HWWRITE > > > +6: ori r10, r10, _PAGE_ACCESSED > > > > > > - /* Update 'changed', among others. > > > - */ > > > -#ifdef CONFIG_SWAP > > > - ori r10, r10, _PAGE_DIRTY|_PAGE_HWWRITE > > > - /* do not set the _PAGE_ACCESSED bit of a non-present page */ > > > - andi. r11, r10, _PAGE_PRESENT > > > - beq 4f > > > - ori r10, r10, _PAGE_ACCESSED > > > -4: > > > -#else > > > - ori r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE > > > -#endif > > > mfspr r11, SPRN_MD_TWC /* Get pte address again */ > > > stw r10, 0(r11) /* and update pte in table */ > > > > > > + xori r10, r10, _PAGE_RW /* Invert RW bit */ > > > + > > > /* The Linux PTE won't go exactly into the MMU TLB. > > > - * Software indicator bits 21, 22 and 28 must be clear. > > > + * Software indicator bit 28 must be clear. > > > * Software indicator bits 24, 25, 26, and 27 must be > > > * set. All other Linux PTE bits control the behavior > > > * of the MMU. > > > > > > > >
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 05/10/2009 23:37:23: > > On Mon, 2009-10-05 at 23:25 +0200, Joakim Tjernlund wrote: > > > > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 05/10/2009 22:17:04: > > > > > > On Mon, 2009-10-05 at 14:16 +0200, Joakim Tjernlund wrote: > > > > Update the TLB asm to make proper use of _PAGE_DIRY and _PAGE_ACCESSED. > > > > Pros: > > > > - I/D TLB Miss never needs to write to the linux pte. > > > > - _PAGE_ACCESSED is only set on I/D TLB Error fixing accounting > > > > - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly > > > > when a page has been made dirty. > > > > > > Not sure here. You seem to still set those from asm. Is that necessary ? > > > > Well, yes. head_32.S also sets ACCESSED and DIRTY from asm so why not me? > > Don't look at the hash 32 code :-) So what to look at then? :) > > > The basic method I use is: > > TLB gets mapped with NoAccess, then the first access will trap > > into TLB error, where ACCESSED will be set if permission to access > > the page is OK (and RW too). On first store 8xx will trap > > into DTLB error and permissions is OK, DIRTY will be set too. > > Is this wrong? > > >From your explanation it looks ok. IE. as long as !DIRTY -> not > writeable (will fault on store) and !ACCESSED means not accessible (will > fault on any access) then you are ok. Yes, that is what I have (tried) to do. > > > Do I have to trap to C for first store? > > Most of the time you do anyways since the PTE isn't populated at all. At > which point, Linux will populate a PTE with DIRTY and ACCESSED already > set. It should be reasonably rare to actually fault because DIRTY and/or > ACCESSED are missing. I tried to unconditionally trap to C in DTLB error but it just hung if I did that so the asm has to do something. > > > > The approach I take on BookE is to simply not set these from asm, -and- > > > (this is important) not set write permission if dirty is not set in Did you really mean "if dirty is not" ? I test RW for write permission. Dirty is just set when the first write happens after the permission check. > > > the TLB miss and set no access permission at all when accessed is not > > > set. This is important or we'll miss dirty updates which can > > > be fatal. > > > > not sure, but this seems similar to what I do. DIRTY will be set, > > in asm, on first store. > > Don't set DIRTY if !VALID I don't. !VALID trap to C > > > ACCESSED will only be set iff (USER && VALID) > > My point is that you should be able to simplify the code here, have only > the TLB miss look at the PTE, not the data access and instruction > access, and have the later be a boring exception going straight to C. I do this, TLB Miss only looks at TLB and then transforms it into a HW pte. The HW pte do not map 1:1 so I need to some magic to fit the linux pte into a HW pte. > > > > > > > The C code in handle_pte_fault() will fixup missing access and dirty > > > if necessary and flush. > > > > > > Also look at the 440 code, I think you could simplify your > > > implementation using similar techniques, such as andc of PTE against > > > requested bits etc... and thus maybe save a couple of insns. > > > > Great, but first I want to make sure I doing it right :) > > > > So is there some golden rule I have to follow? > > Mostly only !ACCESSED -> no access permitted and !DIRTY -> no store > permitted and don't write anything back if !VALID. That should be !RW and not !DIRTY I hope? Then trap first store and set DIRTY (without trapping to C) > > Cheers, > Ben. > > > Jocke > > > > > > > > Cheers, > > > Ben. > > > > > > > - Proper RO/RW mapping of user space. > > > > - Free up 2 SW TLB bits in the linux pte(add back _PAGE_WRITETHRU ?) > > > > Cons: > > > > - 4 more instructions in I/D TLB Miss, but the since the linux pte is > > > > not written anymore, it should still be a win.
On Tue, 2009-10-06 at 00:00 +0200, Joakim Tjernlund wrote: > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 05/10/2009 23:37:23: > > > > On Mon, 2009-10-05 at 23:25 +0200, Joakim Tjernlund wrote: > > > > > > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 05/10/2009 22:17:04: > > > > > > > > On Mon, 2009-10-05 at 14:16 +0200, Joakim Tjernlund wrote: > > > > > Update the TLB asm to make proper use of _PAGE_DIRY and _PAGE_ACCESSED. > > > > > Pros: > > > > > - I/D TLB Miss never needs to write to the linux pte. > > > > > - _PAGE_ACCESSED is only set on I/D TLB Error fixing accounting > > > > > - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly > > > > > when a page has been made dirty. > > > > > > > > Not sure here. You seem to still set those from asm. Is that necessary ? > > > > > > Well, yes. head_32.S also sets ACCESSED and DIRTY from asm so why not me? > > > > Don't look at the hash 32 code :-) > > So what to look at then? :) head_440.S on a recent 2.6 > > Most of the time you do anyways since the PTE isn't populated at all. At > > which point, Linux will populate a PTE with DIRTY and ACCESSED already > > set. It should be reasonably rare to actually fault because DIRTY and/or > > ACCESSED are missing. > > I tried to unconditionally trap to C in DTLB error but it just hung if I did > that so the asm has to do something. Sure, the question is what and how can we get away without it ? :-) > > > > The approach I take on BookE is to simply not set these from asm, -and- > > > > (this is important) not set write permission if dirty is not set in > > Did you really mean "if dirty is not" ? I test RW for write permission. > Dirty is just set when the first write happens after the permission check. Sure but if dirty is cleared by the kernel, then you also need to remove write permission in the TLB or it will miss setting dirty on the next store to the page. So dirty basically acts as a filter on RW, and accessed as a filter on valid if you want. > > Mostly only !ACCESSED -> no access permitted and !DIRTY -> no store > > permitted and don't write anything back if !VALID. > > That should be !RW and not !DIRTY I hope? Then trap > first store and set DIRTY (without trapping to C) No it's really !(RW _AND_ DIRTY) -> no store permitted, and !(PRESENT _AND_ ACCESSED) -> no access permitted. Cheers, Ben. > > > > Cheers, > > Ben. > > > > > Jocke > > > > > > > > > > > Cheers, > > > > Ben. > > > > > > > > > - Proper RO/RW mapping of user space. > > > > > - Free up 2 SW TLB bits in the linux pte(add back _PAGE_WRITETHRU ?) > > > > > Cons: > > > > > - 4 more instructions in I/D TLB Miss, but the since the linux pte is > > > > > not written anymore, it should still be a win.
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 06/10/2009 00:09:16: > > On Tue, 2009-10-06 at 00:00 +0200, Joakim Tjernlund wrote: > > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 05/10/2009 23:37:23: > > > > > > On Mon, 2009-10-05 at 23:25 +0200, Joakim Tjernlund wrote: > > > > > > > > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 05/10/2009 22:17:04: > > > > > > > > > > On Mon, 2009-10-05 at 14:16 +0200, Joakim Tjernlund wrote: > > > > > > Update the TLB asm to make proper use of _PAGE_DIRY and _PAGE_ACCESSED. > > > > > > Pros: > > > > > > - I/D TLB Miss never needs to write to the linux pte. > > > > > > - _PAGE_ACCESSED is only set on I/D TLB Error fixing accounting > > > > > > - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly > > > > > > when a page has been made dirty. > > > > > > > > > > Not sure here. You seem to still set those from asm. Is that necessary ? > > > > > > > > Well, yes. head_32.S also sets ACCESSED and DIRTY from asm so why not me? > > > > > > Don't look at the hash 32 code :-) > > > > So what to look at then? :) > > head_440.S on a recent 2.6 OK, will look > > > > Most of the time you do anyways since the PTE isn't populated at all. At > > > which point, Linux will populate a PTE with DIRTY and ACCESSED already > > > set. It should be reasonably rare to actually fault because DIRTY and/or > > > ACCESSED are missing. > > > > I tried to unconditionally trap to C in DTLB error but it just hung if I did > > that so the asm has to do something. > > Sure, the question is what and how can we get away without it ? :-) You tell me :) > > > > > > The approach I take on BookE is to simply not set these from asm, -and- > > > > > (this is important) not set write permission if dirty is not set in > > > > Did you really mean "if dirty is not" ? I test RW for write permission. > > Dirty is just set when the first write happens after the permission check. > > Sure but if dirty is cleared by the kernel, then you also need to remove > write permission in the TLB or it will miss setting dirty on the next > store to the page. Dirty map to HW changed bit so if kernel clears it, the next store will trap to DTLB error. There I will check RW and clear it again without trapping to C. Is that OK? Not if I read you correcly below .. > > So dirty basically acts as a filter on RW, and accessed as a filter on > valid if you want. > > > > Mostly only !ACCESSED -> no access permitted and !DIRTY -> no store > > > permitted and don't write anything back if !VALID. > > > > That should be !RW and not !DIRTY I hope? Then trap > > first store and set DIRTY (without trapping to C) > > No it's really !(RW _AND_ DIRTY) -> no store permitted, and .. hmm, I don't do that. I should do a if ( store && !(pte & (RW | DIRTY)) goto DSI in DTLB error? > !(PRESENT _AND_ ACCESSED) -> no access permitted. Yes, how about those pinned kernel ptes. Do they have ACCESSED from start? > > Cheers, > Ben. > > > > > > > Cheers, > > > Ben. > > > > > > > Jocke > > > > > > > > > > > > > > Cheers, > > > > > Ben. > > > > > > > > > > > - Proper RO/RW mapping of user space. > > > > > > - Free up 2 SW TLB bits in the linux pte(add back _PAGE_WRITETHRU ?) > > > > > > Cons: > > > > > > - 4 more instructions in I/D TLB Miss, but the since the linux pte is > > > > > > not written anymore, it should still be a win. > > > >
On Tue, 2009-10-06 at 00:55 +0200, Joakim Tjernlund wrote: > > Sure but if dirty is cleared by the kernel, then you also need to > remove > > write permission in the TLB or it will miss setting dirty on the > next > > store to the page. > > Dirty map to HW changed bit so if kernel clears it, the next > store will trap to DTLB error. There I will check RW and clear it > again > without trapping to C. Is that OK? Not if I read you correcly below .. Well, if the HW has the ability to enforce trap when store with !DIRTY, then that's fine, just map it that way, but you shouldn't have to handle it in the DTLB error neither, the kernel will fix it up for you in handle_pte_fault(). I think I need to get myself an 8xx manual and figure out what that damn MMU actually does :-) > No it's really !(RW _AND_ DIRTY) -> no store permitted, and > > .. hmm, I don't do that. I should do a > if ( store && !(pte & (RW | DIRTY)) > goto DSI > in DTLB error? Well, if the HW does the test of DIRTY, then it's fine, as you seem to suggest above. _something_ needs to do it, either SW or HW, ie, we need to catch any attempt to store to a non-dirty page to set dirty. > > !(PRESENT _AND_ ACCESSED) -> no access permitted. > > Yes, how about those pinned kernel ptes. Do they have ACCESSED from > start? They should. Cheers, Ben.
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 06/10/2009 01:15:39: > > On Tue, 2009-10-06 at 00:55 +0200, Joakim Tjernlund wrote: > > > Sure but if dirty is cleared by the kernel, then you also need to > > remove > > > write permission in the TLB or it will miss setting dirty on the > > next > > > store to the page. > > > > Dirty map to HW changed bit so if kernel clears it, the next > > store will trap to DTLB error. There I will check RW and clear it > > again > > without trapping to C. Is that OK? Not if I read you correcly below .. > > Well, if the HW has the ability to enforce trap when store with !DIRTY, Yes, provided that the kernel invalidates the TLB too so the next access will provoke a TLB Miss, which will then provoke a TLB error. The TLB error routine checks VALID, RW and USER(if not a kernel access), then sets ACCESSED & DIRTY and writes the TLB(RPN reg). Perhaps the missing invalidate is haunting us here? > then that's fine, just map it that way, but you shouldn't have to handle > it in the DTLB error neither, the kernel will fix it up for you in > handle_pte_fault(). Does not all ppc have the Changed bit? > > I think I need to get myself an 8xx manual and figure out what that damn > MMU actually does :-) Please do, get the mpc862 users manual :) > > > No it's really !(RW _AND_ DIRTY) -> no store permitted, and > > > > .. hmm, I don't do that. I should do a > > if ( store && !(pte & (RW | DIRTY)) > > goto DSI > > in DTLB error? > > Well, if the HW does the test of DIRTY, then it's fine, as you seem to > suggest above. _something_ needs to do it, either SW or HW, ie, we need > to catch any attempt to store to a non-dirty page to set dirty. Yes, I think I do this. > > > > !(PRESENT _AND_ ACCESSED) -> no access permitted. > > > > Yes, how about those pinned kernel ptes. Do they have ACCESSED from > > start? > > They should. > > Cheers, > Ben. > > > >
On Tue, 2009-10-06 at 01:35 +0200, Joakim Tjernlund wrote: > > > Well, if the HW has the ability to enforce trap when store with ! > DIRTY, > > Yes, provided that the kernel invalidates the TLB too so the next > access > will provoke a TLB Miss, which will then provoke a TLB error. The TLB > error routine checks VALID, RW and USER(if not a kernel access), then > sets > ACCESSED & DIRTY and writes the TLB(RPN reg). > > Perhaps the missing invalidate is haunting us here? No, the kernel will invalidate when clearing dirty or accessed, I don't think that's our problem. This is still all inefficient, we end up basically with two traps. 8xx provides backup GPRs when doing TLB misses ? What does it cost to jump out of a TLB miss back into "normal" context ? IE. What I do on 440 is I set a mask of required bits, basically _PAGE_PRESENT | _PAGE_ACCESSED is the base. The DTLB miss also sticks in _PAGE_RW | _PAGE_DIRTY when it's a store fault. Then, I andc. the PTE value off that mask, and if the result is non-0 (which means one of the required bits is missing), I get out of the TLB miss immediately and go to the data (or instruction) access interrupt. Once you've done that, you should be able to have data and instruction access go straight to C. Missing _PAGE_ACCESSED and _PAGE_DIRTY are going to be fixed up by generic code. > > then that's fine, just map it that way, but you shouldn't have to > handle > > it in the DTLB error neither, the kernel will fix it up for you in > > handle_pte_fault(). > > Does not all ppc have the Changed bit? No. BookE doesn't. > Please do, get the mpc862 users manual :) Ok :-) Cheers, Ben.
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 06/10/2009 02:34:15: > > On Tue, 2009-10-06 at 01:35 +0200, Joakim Tjernlund wrote: > > > > > Well, if the HW has the ability to enforce trap when store with ! > > DIRTY, > > > > Yes, provided that the kernel invalidates the TLB too so the next > > access > > will provoke a TLB Miss, which will then provoke a TLB error. The TLB > > error routine checks VALID, RW and USER(if not a kernel access), then > > sets > > ACCESSED & DIRTY and writes the TLB(RPN reg). > > > > Perhaps the missing invalidate is haunting us here? > > No, the kernel will invalidate when clearing dirty or accessed, I don't > think that's our problem. > > This is still all inefficient, we end up basically with two traps. Yes, but once the 2 traps is over, it gets much cheaper plus I don't get a choice, see below. > > 8xx provides backup GPRs when doing TLB misses ? What does it cost to > jump out of a TLB miss back into "normal" context ? Nope, there is just one TLB scratch register. I have been meaning to ask you about SPRG2, it seems unused? There is a leftover from 2.4 that inits G2 to something but the it appears unused otherwise. > > IE. What I do on 440 is I set a mask of required bits, basically > _PAGE_PRESENT | _PAGE_ACCESSED is the base. The DTLB miss also sticks > in _PAGE_RW | _PAGE_DIRTY when it's a store fault. Yes, I would too but TLB Miss knows nothing about load/store, protection etc. because DSISR isn't set. So I cannot see any other way than the TLB Error way. > > Then, I andc. the PTE value off that mask, and if the result is non-0 > (which means one of the required bits is missing), I get out of the TLB > miss immediately and go to the data (or instruction) access interrupt. > > Once you've done that, you should be able to have data and instruction > access go straight to C. Missing _PAGE_ACCESSED and _PAGE_DIRTY are > going to be fixed up by generic code. > > > > then that's fine, just map it that way, but you shouldn't have to > > handle > > > it in the DTLB error neither, the kernel will fix it up for you in > > > handle_pte_fault(). > > > > Does not all ppc have the Changed bit? > > No. BookE doesn't. But I guess BookE knows if it is a load or store in TLB Miss? Then it can emulate changed bit I guess.
On Tue, 2009-10-06 at 08:15 +0200, Joakim Tjernlund wrote: > Yes, I would too but TLB Miss knows nothing about load/store, protection etc. > because DSISR isn't set. So I cannot see any other way than the TLB Error way. Hrm... that MMU really sucks more than I thought :-( I'll go read the manual and think about that a bit more. > But I guess BookE knows if it is a load or store in TLB Miss? > Then it can emulate changed bit I guess. Yes, it knows. Cheers, Ben.
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 06/10/2009 08:45:47: > > On Tue, 2009-10-06 at 08:15 +0200, Joakim Tjernlund wrote: > > > Yes, I would too but TLB Miss knows nothing about load/store, protection etc. > > because DSISR isn't set. So I cannot see any other way than the TLB Error way. > > Hrm... that MMU really sucks more than I thought :-( Yeah, but at least it got a Changed bit as opposed to some other arch mention in this mail thread :) > > I'll go read the manual and think about that a bit more. Thanks I wonder if not much of the problems Scott and Rex are seeing are from dcbX insn and that my fixup isn't quite working yet. I am tempted to get the asm version working again as then I don't have to worry about permission and get_user() et. all :)
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 06/10/2009 08:45:47: > > On Tue, 2009-10-06 at 08:15 +0200, Joakim Tjernlund wrote: > > > Yes, I would too but TLB Miss knows nothing about load/store, protection etc. > > because DSISR isn't set. So I cannot see any other way than the TLB Error way. > > Hrm... that MMU really sucks more than I thought :-( > > I'll go read the manual and think about that a bit more. I realise now that for an ITLB Miss I can work out everything I need. an ITLB must be a read en everything else is in the linux pte. Just check present and user, add accessed if not already there. I blame that on too little sleep :) I don't think I will dwell on that ATM. Jocke
> > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 06/10/2009 08:45:47: > > > > On Tue, 2009-10-06 at 08:15 +0200, Joakim Tjernlund wrote: > > > > > Yes, I would too but TLB Miss knows nothing about load/store, protection etc. > > > because DSISR isn't set. So I cannot see any other way than the TLB Error way. > > > > Hrm... that MMU really sucks more than I thought :-( > > > > I'll go read the manual and think about that a bit more. > > I realise now that for an ITLB Miss I can work out everything I need. > an ITLB must be a read en everything else is in the linux pte. > Just check present and user, add accessed if not already there. > I blame that on too little sleep :) bugger, more bugs I think. Need to fix the TLB code some more
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 06/10/2009 02:34:15: > > On Tue, 2009-10-06 at 01:35 +0200, Joakim Tjernlund wrote: > > > > > Well, if the HW has the ability to enforce trap when store with ! > > DIRTY, > > > > Yes, provided that the kernel invalidates the TLB too so the next > > access > > will provoke a TLB Miss, which will then provoke a TLB error. The TLB > > error routine checks VALID, RW and USER(if not a kernel access), then > > sets > > ACCESSED & DIRTY and writes the TLB(RPN reg). > > > > Perhaps the missing invalidate is haunting us here? > > No, the kernel will invalidate when clearing dirty or accessed, I don't > think that's our problem. > > This is still all inefficient, we end up basically with two traps. > > 8xx provides backup GPRs when doing TLB misses ? What does it cost to > jump out of a TLB miss back into "normal" context ? > > IE. What I do on 440 is I set a mask of required bits, basically > _PAGE_PRESENT | _PAGE_ACCESSED is the base. The DTLB miss also sticks > in _PAGE_RW | _PAGE_DIRTY when it's a store fault. After some more thinking I don't think I do TLB Miss/Error correctly yet. The problem is ACCESSED. Since I don't know if load or store in TLB Miss I must choose: - Assume load and do what you do above. That will incorrectly set ACCESSED on store ops when mapped as RO(plus whatever more I haven't thought about yet) - Trap to TLB Error and do the above. That will set ACCESSED correctly but won't trap kernel space so these remain what they are. Is anything depending on ACCESSED for kernel pages? if so, what if we set ACCESSED on all kernel pages when mapping them at boot? Will SWAP or some other service(accounting?) not like that? Finally, why do you need to include DIRTY when a store OP? Do you need to do COW before dirtying the page? Seems to work for me to just set DIRTY in TLB Error if RW is set too and not trap to C.
> After some more thinking I don't think I do TLB Miss/Error correctly yet. > The problem is ACCESSED. Since I don't know if load or store in TLB Miss > I must choose: > - Assume load and do what you do above. That will incorrectly > set ACCESSED on store ops when mapped as RO(plus whatever more I haven't thought about yet) > > - Trap to TLB Error and do the above. That will set ACCESSED correctly > but won't trap kernel space so these remain what they are. > Is anything depending on ACCESSED for kernel pages? > if so, what if we set ACCESSED on all kernel pages when mapping them at boot? > Will SWAP or some other service(accounting?) not like that? Kernel pages always have ACCESSED set and never clear it. I think the only solution here is that if -anything- doesn't look right, the TLB miss should create one of those "unpopulated" entries and we need to make sure they are properly invalidated in the subsequent fault path. I'll read the 8xx doco asap to make sure I get it right. > Finally, why do you need to include DIRTY when a store OP? > Do you need to do COW before dirtying the page? > Seems to work for me to just set DIRTY in TLB Error if RW is set too > and not trap to C. You can, but what about a load ? That shouldn't set dirty. So if you set dirty only on stores, then a load will bring in a page without dirty, you need to make sure you get another TLB error when writing to it so you get a chance to set dirty. Ben.
Allright, did a bit of reading of doco and code.. Doco isn't totally clear though. At some stage, it -hints- that in case of a TLB "error" (match on EA/ASID but incorrect protection/valid/changed/...) the offending TLB entry is automatically invalidated. Do you know if that is correct ? I would hope so since we never do anything to remove the "invalid" entries we write to the TLB when hitting non-present PTEs but then, that may also explain some of our problems... Now, a few comments from what I read in the code: - The whole writeback could be avoided in Instruction/Data TLB miss, but for that, you need to make sure that the TLB entry we create has valid set only if -both- present and accessed are set. That would save in the case of CONFIG_SWAP, a store and a read back of TWC I suppose, but you do need to find a way to do that ANDing of ACCESSED and PRESENT. - I think we can get rid of HWWRITE. We can make CHANGED be the current HWWRITE value, I agree with you, which matches the HW changed bit. We need to be a bit careful of how we setup the PP bits tho. At this stage, I see several approaches: * One is to basically "generate" the right PP bits based on a combination of _PAGE_USER and _PAGE_RW. That's the simpler approach but probably uses more code in the TLB miss handler. It would look like that, with MxCTR:PPCS=0 _PAGE_USER _PAGE_RW PP (bits 20..27) 0 0 011C1111 (C is _PAGE_DIRTY) 0 1 000C1111 1 0 110C1111 1 1 100C1111 One easy way to do that is to have _PAGE_USER and _PAGE_RW sit next to each other in bit position 28 and 29 (0xc). Load a GPR with something like 0110000011001000 and rotate it left by PTE & 0xc, then move the resulting 3 bits into position, or something along those lines. You can also give up on kernel read-only support and go down to 2 PP bits and never use the extended encoding. * Another one is to use MxCTR:PPCS=1 a mask of 100C1U00 (U is _PAGE_USER) and or in ^_PAGE_RW (it could actually be made reverse-polarity in the PTE but that would mean some changes to non-8xx specific headers, so let's avoid it for now at least). At least that's the best options I can see from my reading of the doco, though it's not totally clear to me what really happens when doing the PPCS trick, will it generate a TLB error on a non-match or will it try to TLB miss, which could be bad. * Last but not least, it wouldn't be hard to use either of the above encodings, and have the PTE actually contain the right bit combination already. You don't need to have a _PAGE_RW, you don't need to have a _PAGE_USER :-) Look at how I do things for book3e, where I layout the 6 BookE protection bit directly in the PTE. That's a bit harder and maybe will need subtle changes to pte-common.h and such to accomodate it, and so probably something to experiment with as a second step, but it's the most efficient approach in the long run for obvious reasons. - I think we could have more bits pre-set to the right values in the PTE, look how we play with defining some of the constants in pte-common.h, might be worth having a look, but -after- we have something that works :-) - Possible bug: I'm very disturbed by the fact that DataTLBError sets HWWRITE and DIRTY on a non-present PTE. It should not. Just like ACCESSED. That's going to cause trouble and swap corruption, even more as we move DIRTY around. - Maybe we can completely remove that mucking around with dirty, setting of accessed etc... from DataTLBError. Just make it go to C code just like InstructionAccess, as we discussed earlier, the generic code will fix it up and we'll speed up page faults. It should be fairly rare to take a fault due to a missing _PAGE_ACCESSED or _PAGE_DIRTY in any case, so all that fixup in those exceptions is just overhead. - Later on, if it rocks your boat, you may want to look into removing the mucking around with swapper_pg_dir in the TLB miss. Instead, what you can do is lazily copy the kernel PMDs into the user PMDs. IE. On the first kernel access from a new user context, you fault, and from the do_page_fault() code, we can detect that and fixup the user PMD to point to the kernel page tables, thus avoiding that whole bunch of code in the TLB miss. When creating new user page tables, we can pre-fill the kernel PMDs too. I think x86 does that. We could do that for BookE too, though it's less of a win since we have to fixup the TID in MMUCR/MAS but for 8xx it would save a few instructions & conditionals in the TLB miss fast path. - I still have problems with the comment next to the "workaround" tlbil_va() we have in the icache/dcache flush. It doesn't make much sense to me since dcbst is going to be done by the kernel on a kernel address, not a user address, so I don't see how the thing we invalidate relates to the flush we do just below.... So that raises a few questions: * If this is to avoid write faults on kernel pages, then we may just want to have a fixup in do_page_fault() to ignore them when coming from flush_dcache_icache_* using the exception table mechanism instead ? * I don't see how it works around user faults but maybe you have a clear scenario in mind * It appears still reasonably obvious that we need that tlbil_va somewhere in that fault path, but I don't know what for, I'm not sure it's really what the comment says it's about. That raises back the whole question of when are those "invalid" TLB entries that we create when _PAGE_PRESENT is not set are going to be removed. The doc hints that they go a way on TLB error, but the doc really only says so for missing changed bit... so I'm tempted to say we need to basically stick a tlbil_va on 8xx on both set_pte_at() and ptep_set_access_flags() for any non-kernel page. What do you think ? That would be the safest. - The "fake" DataAccess and InstructionAccess are useless... just a spurious jump for nothing. We could implement those straight in DataTLBError and InstructionTLBError, we just need to make sure that the trap numbers we put in the exception frame are 0x300 and 0x400, but that's just a matter of passing the right bits to EXC_XFER_EE_LITE(). - Finally, another boat-rocking optimisation to do is preload. You could hook in update_mmu_cache() for 8xx to go whack an entry in the TLB as well, which would avoid a TLB miss right after a page fault. But again, only when things work. None of the above of course address your DAR-not-updated concern. I think your approach of "clobbering" the DAR on every storage exception after it's been snapshotted and then testing for the clobber and emulating the instruction is the way to go, though your patch could use some cleaning there. I'll post comments on it separately. At the end of the day, I can't help that much without HW access and/or a simulator, as you can guess, and no, I'm not asking for people to send me an 8xx board :-) (especially useless without a BDI imho). So I rely on Scott and yourself to sort this out. But thanks for showing up, and please, do port your board to 2.6 so you can test your stuff :-) Cheers, Ben.
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 07/10/2009 03:07:35: > > Allright, did a bit of reading of doco and code.. hey, this is a super, thanks! > > Doco isn't totally clear though. At some stage, it -hints- that in case > of a TLB "error" (match on EA/ASID but incorrect > protection/valid/changed/...) the offending TLB entry is automatically > invalidated. Do you know if that is correct ? Nope, I don't know. > > I would hope so since we never do anything to remove the "invalid" > entries we write to the TLB when hitting non-present PTEs but then, that > may also explain some of our problems... hmm, then a tlbil_va() in do_page_fault for "no translation" errors would help I guess. > > Now, a few comments from what I read in the code: > > - The whole writeback could be avoided in Instruction/Data TLB miss, > but for that, you need to make sure that the TLB entry we create has > valid set only if -both- present and accessed are set. That would save > in the case of CONFIG_SWAP, a store and a read back of TWC I suppose, > but you do need to find a way to do that ANDing of ACCESSED and PRESENT. So far I have mapped !ACCESSED pages as No Access instead, maybe that was wrong? I thought it was easier but I will have a look at this too. > > - I think we can get rid of HWWRITE. We can make CHANGED be the current > HWWRITE value, I agree with you, which matches the HW changed bit. We > need to be a bit careful of how we setup the PP bits tho. At this stage, > I see several approaches: > > * One is to basically "generate" the right PP bits based on a > combination of _PAGE_USER and _PAGE_RW. That's the simpler approach but > probably uses more code in the TLB miss handler. It would look like > that, with MxCTR:PPCS=0 > > _PAGE_USER _PAGE_RW PP (bits 20..27) > 0 0 011C1111 (C is _PAGE_DIRTY) > 0 1 000C1111 > 1 0 110C1111 > 1 1 100C1111 > > One easy way to do that is to have _PAGE_USER and _PAGE_RW sit next to > each other in bit position 28 and 29 (0xc). Load a GPR with something > like 0110000011001000 and rotate it left by PTE & 0xc, then move the > resulting 3 bits into position, or something along those lines. You can > also give up on kernel read-only support and go down to 2 PP bits and > never use the extended encoding. yes, I do think the extended encoding is too much work and not worth it. One concern I have is if a user RO mapping also can be a kernel RO mapping? That is, do kernel require RW to a user page mapped RO? > > * Another one is to use MxCTR:PPCS=1 a mask of 100C1U00 (U is > _PAGE_USER) and or in ^_PAGE_RW (it could actually be made > reverse-polarity in the PTE but that would mean some changes to non-8xx > specific headers, so let's avoid it for now at least). hmm, interesting. Then I will also have set ACCESSED manually in TLB Miss I think. > > At least that's the best options I can see from my reading of the doco, > though it's not totally clear to me what really happens when doing the > PPCS trick, will it generate a TLB error on a non-match or will it try > to TLB miss, which could be bad. Yes, I don't think anybody has tested this. > > * Last but not least, it wouldn't be hard to use either of the above > encodings, and have the PTE actually contain the right bit combination > already. You don't need to have a _PAGE_RW, you don't need to have a > _PAGE_USER :-) Look at how I do things for book3e, where I layout the > 6 BookE protection bit directly in the PTE. That's a bit harder and maybe > will need subtle changes to pte-common.h and such to accomodate it, and > so probably something to experiment with as a second step, but it's the > most efficient approach in the long run for obvious reasons. > > - I think we could have more bits pre-set to the right values in the > PTE, look how we play with defining some of the constants in > pte-common.h, might be worth having a look, but -after- we have > something that works :-) > > - Possible bug: I'm very disturbed by the fact that DataTLBError sets > HWWRITE and DIRTY on a non-present PTE. It should not. Just like > ACCESSED. That's going to cause trouble and swap corruption, even more > as we move DIRTY around. Yes, this is what I fix with the first patch in my series: 8xx: DTLB Error must check for more errors. DataTLBError currently does: if ((err & 0x02000000) == 0) DSI(); This won't handle a store with no valid translation. Change this to if ((err & 0x48000000) != 0) DSI(); that is, branch to DSI if either !permission or !translation. > > - Maybe we can completely remove that mucking around with dirty, > setting of accessed etc... from DataTLBError. Just make it go to C code > just like InstructionAccess, as we discussed earlier, the generic code > will fix it up and we'll speed up page faults. It should be fairly rare > to take a fault due to a missing _PAGE_ACCESSED or _PAGE_DIRTY in any > case, so all that fixup in those exceptions is just overhead. That depends on if you set ACCESSED in TLB Miss or not I think. if TLB Miss maps pages as NoAccess du to missing ACCESSED then the first access is always going to trap to TLB Error and then to C just to fix DIRTY and/or ACCESSED, feels a bit more expensive fixing it in TLB error, no? BTW, does 2.4 update ACCESSED and DIRTY in generic code the same way as 2.6? It is still much easier for me to test thing in 2.4 and then move it over to 2.6 > > - Later on, if it rocks your boat, you may want to look into removing > the mucking around with swapper_pg_dir in the TLB miss. Instead, what > you can do is lazily copy the kernel PMDs into the user PMDs. IE. On the > first kernel access from a new user context, you fault, and from the > do_page_fault() code, we can detect that and fixup the user PMD to point > to the kernel page tables, thus avoiding that whole bunch of code in the > TLB miss. When creating new user page tables, we can pre-fill the kernel > PMDs too. I think x86 does that. We could do that for BookE too, though > it's less of a win since we have to fixup the TID in MMUCR/MAS but for > 8xx it would save a few instructions & conditionals in the TLB miss fast > path. That would be nice, I have had that idea but no clue as how to do it. > > - I still have problems with the comment next to the "workaround" > tlbil_va() we have in the icache/dcache flush. It doesn't make much > sense to me since dcbst is going to be done by the kernel on a kernel > address, not a user address, so I don't see how the thing we invalidate > relates to the flush we do just below.... So that raises a few > questions: This workaround was added when we started to force TLB Errors in TLB Miss for missing pmd entries so it may be some other bug in the TLB code that makes this happen. Here is a link: http://www.mail-archive.com/linuxppc-embedded@ozlabs.org/msg07885.html Seems to boil down to a missing tlbie() > > * If this is to avoid write faults on kernel pages, then we may just > want to have a fixup in do_page_fault() to ignore them when coming from > flush_dcache_icache_* using the exception table mechanism instead ? > > * I don't see how it works around user faults but maybe you have a > clear scenario in mind > > * It appears still reasonably obvious that we need that tlbil_va > somewhere in that fault path, but I don't know what for, I'm not sure > it's really what the comment says it's about. That raises back the whole > question of when are those "invalid" TLB entries that we create when > _PAGE_PRESENT is not set are going to be removed. The doc hints that > they go a way on TLB error, but the doc really only says so for missing > changed bit... so I'm tempted to say we need to basically stick a > tlbil_va on 8xx on both set_pte_at() and ptep_set_access_flags() for any > non-kernel page. What do you think ? That would be the safest. Possibly just doing a tlbil_va in do_page_fault() for no translation errors comes to mind. > > - The "fake" DataAccess and InstructionAccess are useless... just a > spurious jump for nothing. We could implement those straight in > DataTLBError and InstructionTLBError, we just need to make sure that > the trap numbers we put in the exception frame are 0x300 and 0x400, but > that's just a matter of passing the right bits to EXC_XFER_EE_LITE(). > > - Finally, another boat-rocking optimisation to do is preload. You > could hook in update_mmu_cache() for 8xx to go whack an entry in the TLB > as well, which would avoid a TLB miss right after a page fault. But > again, only when things work. > > None of the above of course address your DAR-not-updated concern. I > think your approach of "clobbering" the DAR on every storage exception > after it's been snapshotted and then testing for the clobber and > emulating the instruction is the way to go, though your patch could use > some cleaning there. Yes, it is full of debug cruft now. > > I'll post comments on it separately. Thanks, I still like the asm version for two reasons: - It contains the problem to 8xx files - fixing it up in do_page_fault() may lead to a never ending loop. This is mainly due to the current TLB code not using DIRTY correctly. In any case it would be nice to keep the asm version in the file, should we need it some day. It was rather painful to get it working so saving it somewhere would be nice. > > At the end of the day, I can't help that much without HW access and/or a > simulator, as you can guess, and no, I'm not asking for people to send > me an 8xx board :-) (especially useless without a BDI imho). So I rely > on Scott and yourself to sort this out. But thanks for showing up, and > please, do port your board to 2.6 so you can test your stuff :-) :)
diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index 8c6e312..af541a2 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -32,22 +32,21 @@ #define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */ #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ #define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ +#define _PAGE_DIRTY 0x0100 /* C: page changed */ /* These five software bits must be masked out when the entry is loaded * into the TLB. */ #define _PAGE_EXEC 0x0008 /* software: i-cache coherency required */ #define _PAGE_GUARDED 0x0010 /* software: guarded access */ -#define _PAGE_DIRTY 0x0020 /* software: page changed */ -#define _PAGE_RW 0x0040 /* software: user write access allowed */ -#define _PAGE_ACCESSED 0x0080 /* software: page referenced */ +#define _PAGE_USER 0x0020 /* software: User space access */ /* Setting any bits in the nibble with the follow two controls will * require a TLB exception handler change. It is assumed unused bits * are always zero. */ -#define _PAGE_HWWRITE 0x0100 /* h/w write enable: never set in Linux PTE */ -#define _PAGE_USER 0x0800 /* One of the PP bits, the other is USER&~RW */ +#define _PAGE_RW 0x0400 /* lsb PP bits */ +#define _PAGE_ACCESSED 0x0800 /* msb PP bits */ #define _PMD_PRESENT 0x0001 #define _PMD_BAD 0x0ff0 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 118bb05..b1f72d9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -333,21 +333,15 @@ InstructionTLBMiss: mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r10, 0(r11) /* Get the pte */ -#ifdef CONFIG_SWAP - /* do not set the _PAGE_ACCESSED bit of a non-present page */ - andi. r11, r10, _PAGE_PRESENT - beq 4f - ori r10, r10, _PAGE_ACCESSED - mfspr r11, SPRN_MD_TWC /* get the pte address again */ - stw r10, 0(r11) -4: -#else - ori r10, r10, _PAGE_ACCESSED - stw r10, 0(r11) -#endif - + andi. r11, r10, _PAGE_USER | _PAGE_ACCESSED + cmpwi cr0, r11, _PAGE_USER | _PAGE_ACCESSED + beq+ cr0, 5f /* branch if access allowed */ + rlwinm r10, r10, 0, 22, 19 /* r10 &= ~(_PAGE_ACCESSED | _PAGE_RW) */ + b 6f +5: xori r10, r10, _PAGE_RW /* invert RW bit */ +6: /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bit 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. @@ -409,21 +403,15 @@ DataStoreTLBMiss: DO_8xx_CPU6(0x3b80, r3) mtspr SPRN_MD_TWC, r11 -#ifdef CONFIG_SWAP - /* do not set the _PAGE_ACCESSED bit of a non-present page */ - andi. r11, r10, _PAGE_PRESENT - beq 4f - ori r10, r10, _PAGE_ACCESSED -4: - /* and update pte in table */ -#else - ori r10, r10, _PAGE_ACCESSED -#endif - mfspr r11, SPRN_MD_TWC /* get the pte address again */ - stw r10, 0(r11) - + andi. r11, r10, _PAGE_USER | _PAGE_ACCESSED + cmpwi cr0, r11, _PAGE_USER | _PAGE_ACCESSED + beq+ cr0, 5f /* branch if access allowed */ + rlwinm r10, r10, 0, 22, 19 /* r10 &= ~(_PAGE_ACCESSED | _PAGE_RW) */ + b 6f +5: xori r10, r10, _PAGE_RW /* invert RW bit */ +6: /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bit 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. @@ -448,6 +436,91 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 + stw r10, 0(r0) + stw r11, 4(r0) + + mfspr r11, SPRN_SRR1 + andis. r11, r11, 0x5000 /* no translation, guarded */ + bne 2f + + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ +#ifdef CONFIG_8xx_CPU15 + addi r11, r10, 0x1000 + tlbie r11 + addi r11, r10, -0x1000 + tlbie r11 +#endif + DO_8xx_CPU6(0x3780, r3) + mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ + beq 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + rlwimi r10, r11, 0, 2, 19 +3: + lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ + beq 2f /* If zero, don't try to find a pte */ + + /* We have a pte table, so load the MI_TWC with the attributes + * for this "segment." + */ + ori r11,r11,1 /* Set valid bit */ + DO_8xx_CPU6(0x2b80, r3) + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + + mfspr r11, SPRN_SRR1 + andi. r11, r11, 0x4000 /* MSR[PR] */ + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ + lwz r10, 0(r11) /* Get the pte */ + beq 5f /* Kernel access always OK */ + andi. r11,r10, _PAGE_USER + beq 2f +5: ori r10, r10, _PAGE_ACCESSED + mfspr r21, SPRN_MD_TWC /* ....and get the pte address */ + stw r10, 0(r11) + xori r10, r10, _PAGE_RW /* invert RW bit */ + + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bit 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior + * of the MMU. + */ + li r11, 0x00f0 + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + DO_8xx_CPU6(0x2d80, r3) + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ + + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + rfi + +2: mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif b InstructionAccess /* This is the data TLB error on the MPC8xx. This could be due to @@ -472,8 +545,8 @@ DataTLBError: /* First, make sure this was a store operation. */ mfspr r10, SPRN_DSISR - andis. r11, r10, 0x4800 /* no translation, no permission. */ - bne 2f /* branch if either is set */ + andis. r11, r10, 0x4000 /* no translation */ + bne 2f /* branch if set */ /* The EA of a data TLB miss is automatically stored in the MD_EPN * register. The EA of a data TLB error is automatically stored in @@ -522,26 +595,26 @@ DataTLBError: mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r10, 0(r11) /* Get the pte */ - andi. r11, r10, _PAGE_RW /* Is it writeable? */ - beq 2f /* Bail out if not */ + mfspr r11, SPRN_SRR1 + andi. r11, r11, 0x4000 /* MSR[PR] */ + beq 5f /* Kernel access always OK */ + andi. r11,r10, _PAGE_USER + beq 2f +5: mfspr r11, SPRN_DSISR + andis. r11, r11, 0x0200 /* store */ + beq 6f + andi. r11, r10, _PAGE_RW /* writeable? */ + beq 2f /* branch if not */ + ori r10, r10, _PAGE_DIRTY | _PAGE_HWWRITE +6: ori r10, r10, _PAGE_ACCESSED - /* Update 'changed', among others. - */ -#ifdef CONFIG_SWAP - ori r10, r10, _PAGE_DIRTY|_PAGE_HWWRITE - /* do not set the _PAGE_ACCESSED bit of a non-present page */ - andi. r11, r10, _PAGE_PRESENT - beq 4f - ori r10, r10, _PAGE_ACCESSED -4: -#else - ori r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE -#endif mfspr r11, SPRN_MD_TWC /* Get pte address again */ stw r10, 0(r11) /* and update pte in table */ + xori r10, r10, _PAGE_RW /* Invert RW bit */ + /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bit 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU.