===== arch/ppc64/kernel/head.S 1.39 vs edited ===== --- 1.39/arch/ppc64/kernel/head.S Thu Oct 2 08:41:11 2003 +++ edited/arch/ppc64/kernel/head.S Fri Dec 12 06:04:53 2003 @@ -601,25 +601,29 @@ */ .globl DataAccess_common DataAccess_common: +BEGIN_FTR_SECTION mfspr r22,DAR srdi r22,r22,60 cmpi 0,r22,0xc /* Segment fault on a bolted segment. Go off and map that segment. */ beq- .do_stab_bolted +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) stab_bolted_user_return: EXCEPTION_PROLOG_COMMON ld r3,_DSISR(r1) - andis. r0,r3,0xa450 /* weird error? */ + andis. r0,r3,0x0450 /* weird error? */ bne 1f /* if not, try to put a PTE */ andis. r0,r3,0x0020 /* Is it a page table fault? */ rlwinm r4,r3,32-23,29,29 /* DSISR_STORE -> _PAGE_RW */ ld r3,_DAR(r1) /* into the hash table */ +BEGIN_FTR_SECTION beq+ 2f /* If so handle it */ li r4,0x300 /* Trap number */ bl .do_stab_SI b 1f +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) 2: li r5,0x300 bl .do_hash_page_DSI /* Try to handle as hpte fault */ @@ -645,7 +649,7 @@ EXCEPTION_PROLOG_COMMON ld r3,_DAR(r1) li r4,0x380 /* Exception vector */ - bl .ste_allocate + bl .slb_allocate or. r3,r3,r3 /* Check return code */ beq fast_exception_return /* Return if we succeeded */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -660,12 +664,14 @@ InstructionAccess_common: EXCEPTION_PROLOG_COMMON +BEGIN_FTR_SECTION andis. r0,r23,0x0020 /* no ste found? */ beq+ 2f mr r3,r22 /* SRR0 at interrupt */ li r4,0x400 /* Trap number */ bl .do_stab_SI b 1f +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) 2: mr r3,r22 li r5,0x400 @@ -685,7 +691,7 @@ EXCEPTION_PROLOG_COMMON mr r3,r22 /* SRR0 = NIA */ li r4,0x480 /* Exception vector */ - bl .ste_allocate + bl .slb_allocate or. r3,r3,r3 /* Check return code */ beq+ fast_exception_return /* Return if we succeeded */ @@ -944,48 +950,27 @@ * r20 - r23, SRR0 and SRR1 are saved in the exception frame. * We assume we aren't going to take any exceptions during this procedure. */ +/* XXX note fix masking in get_kernel_vsid to match */ _GLOBAL(do_slb_bolted) - stw r23,EX_CCR(r21) /* save CR in exc. frame */ + stw r23,EX_CCR(r21) /* save CR in exc. frame */ - /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */ - mfspr r21,DAR - rldicl r20,r21,36,32 /* Permits a full 32b of ESID */ - rldicr r20,r20,15,48 - rldicl r21,r21,4,60 - or r20,r20,r21 - - li r21,9 /* VSID_RANDOMIZER */ - sldi r21,r21,32 - oris r21,r21,58231 - ori r21,r21,39831 - - mulld r20,r20,r21 - clrldi r20,r20,28 /* r20 = vsid */ - - /* Search the SLB for a free entry */ - li r22,1 -1: - slbmfee r23,r22 - rldicl r23,r23,37,63 - cmpwi r23,0 - beq 4f /* Found an invalid entry */ - - addi r22,r22,1 - cmpldi r22,64 - blt 1b + /* + * We take the next entry, round robin. Previously we tried + * to find a free slot first but that took too long. Unfortunately + * we dont have any LRU information to help us choose a slot. + */ - /* No free entry - just take the next entry, round-robin */ - /* XXX we should get the number of SLB entries from the naca */ + /* r20 = paca */ + /* use a cpu feature mask if we ever change our slb size */ SLB_NUM_ENTRIES = 64 -2: mfspr r21,SPRG3 - ld r22,PACASTABRR(r21) - addi r23,r22,1 - cmpdi r23,SLB_NUM_ENTRIES - blt 3f - li r23,1 -3: std r23,PACASTABRR(r21) +1: ld r22,PACASTABRR(r20) + addi r21,r22,1 + cmpdi r21,SLB_NUM_ENTRIES + blt+ 2f + li r21,1 /* dont touch bolted slot 0 */ +2: std r21,PACASTABRR(r20) - /* r20 = vsid, r22 = entry */ + /* r20 = paca, r22 = entry */ /* * Never cast out the segment for our kernel stack. Since we @@ -994,48 +979,87 @@ * which gets invalidated due to a tlbie from another cpu at a * non recoverable point (after setting srr0/1) - Anton */ - slbmfee r23,r22 - srdi r23,r23,28 + slbmfee r21,r22 + insrdi r21,r21,12,36 /* move valid bit 2^11 to 2^27 */ + srdi r21,r21,27 /* * This is incorrect (r1 is not the kernel stack) if we entered * from userspace but there is no critical window from userspace * so this should be OK. Also if we cast out the userspace stack * segment while in userspace we will fault it straight back in. */ - srdi r21,r1,28 - cmpd r21,r23 - beq- 2b - - /* Put together the vsid portion of the entry. */ -4: li r21,0 - rldimi r21,r20,12,0 - ori r20,r21,1024 - ori r20,r20,128 /* set class bit for kernel region */ -#ifndef CONFIG_PPC_ISERIES - ori r20,r20,256 /* map kernel region with large ptes */ -#endif + srdi r23,r1,27 + ori r23,r23,1 + cmpd r23,r21 + beq- 1b + + /* r20 = paca, r22 = entry */ + + /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */ + mfspr r21,DAR + rldicl r23,r21,36,51 + sldi r23,r23,15 + srdi r21,r21,60 + or r23,r23,r21 + + /* VSID_RANDOMIZER */ + li r21,9 + sldi r21,r21,32 + oris r21,r21,58231 + ori r21,r21,39831 + + /* vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK */ + mulld r23,r23,r21 + clrldi r23,r23,28 + + /* r20 = paca, r22 = entry, r23 = vsid */ + + /* Put together slb word1 */ + sldi r23,r23,12 + +BEGIN_FTR_SECTION + /* set kp and c bits */ + ori r23,r23,0x480 +END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) +BEGIN_FTR_SECTION + /* set kp, l and c bits */ + ori r23,r23,0x580 +END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) + + /* r20 = paca, r22 = entry, r23 = slb word1 */ - /* Put together the esid portion of the entry. */ - mfspr r21,DAR /* Get the new esid */ - rldicl r21,r21,36,28 /* Permits a full 36b of ESID */ - li r23,0 - rldimi r23,r21,28,0 /* Insert esid */ - oris r21,r23,2048 /* valid bit */ - rldimi r21,r22,0,52 /* Insert entry */ + /* Put together slb word0 */ + mfspr r21,DAR + rldicr r21,r21,0,35 /* get the new esid */ + oris r21,r21,2048 /* set valid bit */ + rldimi r21,r22,0,52 /* insert entry */ + + /* r20 = paca, r21 = slb word0, r23 = slb word1 */ /* * No need for an isync before or after this slbmte. The exception * we enter with and the rfid we exit with are context synchronizing . */ - slbmte r20,r21 + slbmte r23,r21 /* All done -- return from exception. */ - mfsprg r20,3 /* Load the PACA pointer */ - ld r21,PACAEXCSP(r20) /* Get the exception frame pointer */ - addi r21,r21,EXC_FRAME_SIZE + ld r21,PACAEXCSP(r20) /* Get the exception frame pointer */ + addi r21,r21,EXC_FRAME_SIZE lwz r23,EX_CCR(r21) /* get saved CR */ /* note that this is almost identical to maskable_exception_exit */ - mtcr r23 /* restore CR */ + + /* + * Until everyone updates binutils hardwire the POWER4 optimised + * single field mtcrf + */ +#if 0 + .machine push + .machine "power4" + mtcrf 0x80,r23 + .machine pop +#else + .long 0x7ef80120 +#endif mfmsr r22 li r23, MSR_RI @@ -1045,10 +1069,14 @@ ld r22,EX_SRR0(r21) /* Get SRR0 from exc. frame */ ld r23,EX_SRR1(r21) /* Get SRR1 from exc. frame */ mtspr SRR0,r22 - mtspr SRR1,r23 + mtspr SRR1,r23 ld r22,EX_R22(r21) /* restore r22 and r23 */ ld r23,EX_R23(r21) +#if 1 + ld r20,EX_R20(r21) +#else mfspr r20,SPRG2 +#endif mfspr r21,SPRG1 rfid ===== arch/ppc64/kernel/pacaData.c 1.9 vs edited ===== --- 1.9/arch/ppc64/kernel/pacaData.c Thu Oct 16 23:15:36 2003 +++ edited/arch/ppc64/kernel/pacaData.c Fri Dec 12 05:56:08 2003 @@ -41,7 +41,6 @@ .xStab_data = { \ .real = (asrr), /* Real pointer to segment table */ \ .virt = (asrv), /* Virt pointer to segment table */ \ - .next_round_robin = 1 /* Round robin index */ \ }, \ .lpQueuePtr = (lpq), /* &xItLpQueue, */ \ /* .xRtas = { \ ===== arch/ppc64/kernel/process.c 1.41 vs edited ===== --- 1.41/arch/ppc64/kernel/process.c Wed Oct 8 12:53:40 2003 +++ edited/arch/ppc64/kernel/process.c Fri Dec 12 05:56:09 2003 @@ -109,9 +109,29 @@ new_thread = &new->thread; old_thread = ¤t->thread; +#if 0 + printk("%d switch %p (%s) -> %p (%s)\n", smp_processor_id(), + prev, prev->comm, new, new->comm); +#endif + local_irq_save(flags); last = _switch(old_thread, new_thread); + + if ((cur_cpu_spec->cpu_features & CPU_FTR_SLB) && + GET_ESID((unsigned long)_get_SP()) != GET_ESID(PAGE_OFFSET)) { + union { + unsigned long word0; + slb_dword0 data; + } esid_data; + + esid_data.word0 = 0; + /* class bit is in valid field for slbie instruction */ + esid_data.data.v = 1; + esid_data.data.esid = GET_ESID((unsigned long)_get_SP()); + asm volatile("isync; slbie %0; isync" : : "r" (esid_data)); + } local_irq_restore(flags); + return last; } ===== arch/ppc64/kernel/stab.c 1.13 vs edited ===== --- 1.13/arch/ppc64/kernel/stab.c Sun Sep 7 11:24:09 2003 +++ edited/arch/ppc64/kernel/stab.c Fri Dec 12 06:21:53 2003 @@ -39,6 +39,9 @@ esid = GET_ESID(KERNELBASE); vsid = get_kernel_vsid(esid << SID_SHIFT); + /* XXX should call this earlier */ + do_cpu_ftr_fixups(0); + if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { /* Invalidate the entire SLB & all the ERATS */ #ifdef CONFIG_PPC_ISERIES @@ -60,6 +63,10 @@ } /* + * Segment table stuff + */ + +/* * Create a segment table entry for the given esid/vsid pair. */ int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) @@ -140,6 +147,161 @@ return (global_entry | (castout_entry & 0x7)); } +static inline void __ste_allocate(unsigned long esid, unsigned long vsid, + int kernel_segment, mm_context_t context) +{ + unsigned char top_entry, stab_entry, *segments; + + stab_entry = make_ste(get_paca()->xStab_data.virt, esid, vsid); + PMC_SW_PROCESSOR_A(stab_entry_use, stab_entry & 0xf); + + segments = get_paca()->xSegments; + top_entry = get_paca()->stab_cache_pointer; + if (!kernel_segment && top_entry < STAB_CACHE_SIZE) { + segments[top_entry] = stab_entry; + if (top_entry == STAB_CACHE_SIZE) + top_entry = 0xff; + top_entry++; + get_paca()->stab_cache_pointer = top_entry; + } +} + +/* + * Allocate a segment table entry for the given ea. + */ +int ste_allocate(unsigned long ea) +{ + unsigned long vsid, esid; + int kernel_segment = 0; + mm_context_t context; + + PMC_SW_PROCESSOR(stab_faults); + + /* Check for invalid effective addresses. */ + if (!IS_VALID_EA(ea)) + return 1; + + /* Kernel or user address? */ + if (REGION_ID(ea) >= KERNEL_REGION_ID) { + kernel_segment = 1; + vsid = get_kernel_vsid(ea); + context = REGION_ID(ea); + } else { + if (!current->mm) + return 1; + + vsid = get_vsid(context, ea); + context = current->mm->context; + } + + esid = GET_ESID(ea); + __ste_allocate(esid, vsid, kernel_segment, context); + /* Order update */ + asm volatile("sync":::"memory"); + + return 0; +} + +/* + * preload some userspace segments into the segment table. + */ +static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + unsigned long pc_esid = GET_ESID(pc); + unsigned long stack_esid = GET_ESID(stack); + unsigned long unmapped_base_esid; + unsigned long vsid; + + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + unmapped_base_esid = GET_ESID(unmapped_base); + + if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, pc); + __ste_allocate(pc_esid, vsid, 0, mm->context); + + if (pc_esid == stack_esid) + return; + + if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, stack); + __ste_allocate(stack_esid, vsid, 0, mm->context); + + if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) + return; + + if (!IS_VALID_EA(unmapped_base) || + (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, unmapped_base); + __ste_allocate(unmapped_base_esid, vsid, 0, mm->context); + + /* Order update */ + asm volatile("sync" : : : "memory"); +} + +/* Flush all user entries from the segment table of the current processor. */ +void flush_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + STE *stab = (STE *) get_paca()->xStab_data.virt; + STE *ste; + unsigned long flags; + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + local_irq_save(flags); + if (get_paca()->stab_cache_pointer != 0xff) { + int i; + unsigned char *segments = get_paca()->xSegments; + + for (i = 0; i < get_paca()->stab_cache_pointer; i++) { + ste = stab + segments[i]; + ste->dw0.dw0.v = 0; + PMC_SW_PROCESSOR(stab_invalidations); + } + + asm volatile("sync; slbia; sync":::"memory"); + } else { + unsigned long entry; + + /* Invalidate all entries. */ + ste = stab; + + /* Never flush the first entry. */ + ste += 1; + for (entry = 1; + entry < (PAGE_SIZE / sizeof(STE)); + entry++, ste++) { + unsigned long ea; + ea = ste->dw0.dw0.esid << SID_SHIFT; + if (STAB_PRESSURE || ea < KERNELBASE) { + ste->dw0.dw0.v = 0; + PMC_SW_PROCESSOR(stab_invalidations); + } + } + + asm volatile("sync; slbia; sync":::"memory"); + } + + get_paca()->stab_cache_pointer = 0; + local_irq_restore(flags); + + preload_stab(tsk, mm); +} + +/* + * SLB stuff + */ + /* * Create a segment buffer entry for the given esid/vsid pair. * @@ -159,23 +321,16 @@ slb_dword1 data; } vsid_data; - /* - * Find an empty entry, if one exists. Must start at 0 because - * we use this code to load SLB entry 0 at boot. - */ - for (entry = 0; entry < naca->slb_size; entry++) { - asm volatile("slbmfee %0,%1" - : "=r" (esid_data) : "r" (entry)); - if (!esid_data.data.v) - goto write_entry; - } +#if 0 /* convert to per cpu variable */ + PMC_SW_PROCESSOR(stab_capacity_castouts); +#endif /* - * Could not find empty entry, pick one with a round robin selection. + * We take the next entry, round robin. Previously we tried + * to find a free slot first but that took too long. Unfortunately + * we dont have any LRU information to help us choose a slot. */ - PMC_SW_PROCESSOR(stab_capacity_castouts); - /* * Never cast out the segment for our kernel stack. Since we * dont invalidate the ERAT we could have a valid translation @@ -190,13 +345,13 @@ if (castout_entry >= naca->slb_size) castout_entry = 1; asm volatile("slbmfee %0,%1" : "=r" (esid_data) : "r" (entry)); - } while (esid_data.data.esid == GET_ESID((unsigned long)_get_SP())); + } while (esid_data.data.v && + esid_data.data.esid == GET_ESID((unsigned long)_get_SP())); get_paca()->xStab_data.next_round_robin = castout_entry; /* slbie not needed as the previous mapping is still valid. */ -write_entry: /* * Write the new SLB entry. */ @@ -220,211 +375,154 @@ asm volatile("slbmte %0,%1" : : "r" (vsid_data), "r" (esid_data)); } -static inline void __ste_allocate(unsigned long esid, unsigned long vsid, +#define NR_SLB_CACHE_ENTRIES 8 +DEFINE_PER_CPU(long, slb_cache_ptr); +DEFINE_PER_CPU(long, slb_cache[NR_SLB_CACHE_ENTRIES]); + +static inline void __slb_allocate(unsigned long esid, unsigned long vsid, int kernel_segment, mm_context_t context) { - if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { - int large = 0; + int large = 0; + int region_id = REGION_ID(esid << SID_SHIFT); + unsigned long *offset; -#ifndef CONFIG_PPC_ISERIES - if (REGION_ID(esid << SID_SHIFT) == KERNEL_REGION_ID) + if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) { + if (region_id == KERNEL_REGION_ID) large = 1; - else if (REGION_ID(esid << SID_SHIFT) == USER_REGION_ID) + else if (region_id == USER_REGION_ID) large = in_hugepage_area(context, esid << SID_SHIFT); -#endif - make_slbe(esid, vsid, large, kernel_segment); - } else { - unsigned char top_entry, stab_entry, *segments; + } - stab_entry = make_ste(get_paca()->xStab_data.virt, esid, vsid); - PMC_SW_PROCESSOR_A(stab_entry_use, stab_entry & 0xf); + make_slbe(esid, vsid, large, kernel_segment); - segments = get_paca()->xSegments; - top_entry = get_paca()->stab_cache_pointer; - if (!kernel_segment && top_entry < STAB_CACHE_SIZE) { - segments[top_entry] = stab_entry; - if (top_entry == STAB_CACHE_SIZE) - top_entry = 0xff; - top_entry++; - get_paca()->stab_cache_pointer = top_entry; - } + if (region_id != USER_REGION_ID) + return; + + offset = &__get_cpu_var(slb_cache_ptr); + if (*offset < NR_SLB_CACHE_ENTRIES) { + __get_cpu_var(slb_cache[*offset]) = esid; } + (*offset)++; } /* * Allocate a segment table entry for the given ea. */ -int ste_allocate(unsigned long ea) +int slb_allocate(unsigned long ea) { unsigned long vsid, esid; int kernel_segment = 0; mm_context_t context; +#if 0 /* convert to per cpu variable */ PMC_SW_PROCESSOR(stab_faults); +#endif /* Check for invalid effective addresses. */ - if (!IS_VALID_EA(ea)) + if (unlikely(!IS_VALID_EA(ea))) return 1; /* Kernel or user address? */ if (REGION_ID(ea) >= KERNEL_REGION_ID) { kernel_segment = 1; - vsid = get_kernel_vsid(ea); context = REGION_ID(ea); + vsid = get_kernel_vsid(ea); } else { - if (! current->mm) + if (unlikely(!current->mm)) return 1; context = current->mm->context; - vsid = get_vsid(context, ea); } esid = GET_ESID(ea); - __ste_allocate(esid, vsid, kernel_segment, context); - if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) { - /* Order update */ - asm volatile("sync":::"memory"); - } + __slb_allocate(esid, vsid, kernel_segment, context); return 0; } -unsigned long ppc64_preload_all_segments; -unsigned long ppc64_stab_preload = 1; -#define STAB_PRESSURE 0 -#define USE_SLBIE_ON_STAB 0 - /* - * preload all 16 segments for a 32 bit process and the PC and SP segments - * for a 64 bit process. + * preload some userspace segments into the SLB. */ -static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) +static void preload_slb(struct task_struct *tsk, struct mm_struct *mm) { - if (ppc64_preload_all_segments && - test_tsk_thread_flag(tsk, TIF_32BIT)) { - unsigned long esid, vsid; - - for (esid = 0; esid < 16; esid++) { - unsigned long ea = esid << SID_SHIFT; - vsid = get_vsid(mm->context, ea); - __ste_allocate(esid, vsid, 0, mm->context); - } - } else { - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long pc_segment = pc & ~SID_MASK; - unsigned long stack_segment = stack & ~SID_MASK; - unsigned long vsid; - - if (pc) { - if (!IS_VALID_EA(pc) || - (REGION_ID(pc) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context, pc); - __ste_allocate(GET_ESID(pc), vsid, 0, mm->context); - } - - if (stack && (pc_segment != stack_segment)) { - if (!IS_VALID_EA(stack) || - (REGION_ID(stack) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context, stack); - __ste_allocate(GET_ESID(stack), vsid, 0, mm->context); - } - } + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + unsigned long pc_esid = GET_ESID(pc); + unsigned long stack_esid = GET_ESID(stack); + unsigned long unmapped_base_esid; + unsigned long vsid; + + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + unmapped_base_esid = GET_ESID(unmapped_base); + +#if 0 + printk("%d preload pc %p %lx %lx\n", smp_processor_id(), tsk->thread.regs, pc, pc_esid); + printk("%d preload stack %p %lx %lx\n", smp_processor_id(), tsk->thread.regs, stack, stack_esid); + printk("%d preload unmapped base %p %lx %lx\n", smp_processor_id(), tsk->thread.regs, unmapped_base, unmapped_base_esid); +#endif - if (!(cur_cpu_spec->cpu_features & CPU_FTR_SLB)) { - /* Order update */ - asm volatile("sync" : : : "memory"); - } + if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, pc); + __slb_allocate(pc_esid, vsid, 0, mm->context); + + if (pc_esid == stack_esid) + return; + + if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, stack); + __slb_allocate(stack_esid, vsid, 0, mm->context); + + if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) + return; + + if (!IS_VALID_EA(unmapped_base) || + (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context, unmapped_base); + __slb_allocate(unmapped_base_esid, vsid, 0, mm->context); } /* Flush all user entries from the segment table of the current processor. */ -void flush_stab(struct task_struct *tsk, struct mm_struct *mm) +void flush_slb(struct task_struct *tsk, struct mm_struct *mm) { - if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { - /* - * XXX disable 32bit slb invalidate optimisation until we fix - * the issue where a 32bit app execed out of a 64bit app can - * cause segments above 4GB not to be flushed - Anton - */ - if (0 && !STAB_PRESSURE && test_thread_flag(TIF_32BIT)) { - union { - unsigned long word0; - slb_dword0 data; - } esid_data; - unsigned long esid; - - asm volatile("isync" : : : "memory"); - for (esid = 0; esid < 16; esid++) { - esid_data.word0 = 0; - esid_data.data.esid = esid; - asm volatile("slbie %0" : : "r" (esid_data)); - } - asm volatile("isync" : : : "memory"); - } else { - asm volatile("isync; slbia; isync":::"memory"); - } + unsigned long *offset = &__get_cpu_var(slb_cache_ptr); - PMC_SW_PROCESSOR(stab_invalidations); - } else { - STE *stab = (STE *) get_paca()->xStab_data.virt; - STE *ste; - unsigned long flags; + /* fix off by one here */ + if (*offset <= NR_SLB_CACHE_ENTRIES) { + int i; + union { + unsigned long word0; + slb_dword0 data; + } esid_data; - /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); - - local_irq_save(flags); - if (get_paca()->stab_cache_pointer != 0xff && !STAB_PRESSURE) { - int i; - unsigned char *segments = get_paca()->xSegments; - - for (i = 0; i < get_paca()->stab_cache_pointer; i++) { - ste = stab + segments[i]; - ste->dw0.dw0.v = 0; - PMC_SW_PROCESSOR(stab_invalidations); - } - -#if USE_SLBIE_ON_STAB - asm volatile("sync":::"memory"); - for (i = 0; i < get_paca()->stab_cache_pointer; i++) { - ste = stab + segments[i]; - asm volatile("slbie %0" : : - "r" (ste->dw0.dw0.esid << SID_SHIFT)); - } - asm volatile("sync":::"memory"); -#else - asm volatile("sync; slbia; sync":::"memory"); + for (i = 0; i < *offset; i++) { + esid_data.word0 = 0; + esid_data.data.esid = __get_cpu_var(slb_cache[i]); + asm volatile("slbie %0" : : "r" (esid_data)); +#if 0 + printk("%d shoot esid %lx\n", smp_processor_id(), + __get_cpu_var(slb_cache[i])); #endif - - } else { - unsigned long entry; - - /* Invalidate all entries. */ - ste = stab; - - /* Never flush the first entry. */ - ste += 1; - for (entry = 1; - entry < (PAGE_SIZE / sizeof(STE)); - entry++, ste++) { - unsigned long ea; - ea = ste->dw0.dw0.esid << SID_SHIFT; - if (STAB_PRESSURE || ea < KERNELBASE) { - ste->dw0.dw0.v = 0; - PMC_SW_PROCESSOR(stab_invalidations); - } - } - - asm volatile("sync; slbia; sync":::"memory"); } - - get_paca()->stab_cache_pointer = 0; - local_irq_restore(flags); + asm volatile("isync" : : : "memory"); + } else { + asm volatile("isync; slbia; isync":::"memory"); } - if (ppc64_stab_preload) - preload_stab(tsk, mm); + *offset = 0; + +#if 0 /* convert to per cpu variable */ + PMC_SW_PROCESSOR(stab_invalidations); +#endif + + preload_slb(tsk, mm); } ===== include/asm-ppc64/mmu_context.h 1.7 vs edited ===== --- 1.7/include/asm-ppc64/mmu_context.h Sun Sep 7 11:24:09 2003 +++ edited/include/asm-ppc64/mmu_context.h Fri Dec 12 05:56:08 2003 @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -134,6 +135,7 @@ } extern void flush_stab(struct task_struct *tsk, struct mm_struct *mm); +extern void flush_slb(struct task_struct *tsk, struct mm_struct *mm); /* * switch_mm is the entry point called from the architecture independent @@ -143,7 +145,16 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { - flush_stab(tsk, next); + /* No need to flush userspace segments if the mm doesnt change */ + if (prev == next) + return; +#if 0 + printk("switch_mm %p -> %p tsk %p (%s)\n", prev, next, tsk, tsk->comm); +#endif + if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) + flush_slb(tsk, next); + else + flush_stab(tsk, next); cpu_set(smp_processor_id(), next->cpu_vm_mask); }