From paulus at samba.org Sun Aug 1 05:27:21 2004 From: paulus at samba.org (Paul Mackerras) Date: Sat, 31 Jul 2004 14:27:21 -0500 Subject: [RFC][PATCH] ppc64: better handling of H_ENTER failures In-Reply-To: <1091164951.2077.34.camel@gaston> References: <1091164951.2077.34.camel@gaston> Message-ID: <16651.62105.156445.534596@cargo.ozlabs.ibm.com> Benjamin Herrenschmidt writes: > This patch changes the hash insertion routines to return an error > instead of calling panic() when HV refuses to insert a HPTE. Looks good, do you have a test program? It should be possible to provoke the error with xmon -m /dev/mem, if nothing else. :) Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From benh at kernel.crashing.org Sun Aug 1 11:52:04 2004 From: benh at kernel.crashing.org (Benjamin Herrenschmidt) Date: Sun, 01 Aug 2004 11:52:04 +1000 Subject: [RFC][PATCH] ppc64: better handling of H_ENTER failures In-Reply-To: <16651.62105.156445.534596@cargo.ozlabs.ibm.com> References: <1091164951.2077.34.camel@gaston> <16651.62105.156445.534596@cargo.ozlabs.ibm.com> Message-ID: <1091325123.7389.45.camel@gaston> On Sun, 2004-08-01 at 05:27, Paul Mackerras wrote: > Benjamin Herrenschmidt writes: > > > This patch changes the hash insertion routines to return an error > > instead of calling panic() when HV refuses to insert a HPTE. > > > Looks good, do you have a test program? It should be possible to > provoke the error with xmon -m /dev/mem, if nothing else. :) I had a test case with the VGA driver back in Austin, I didn't test from userland. Do you have a box at hand where HV will refuse a HPTE insertion for legacy ISA space ? that's probably the easiest way to trigger it. Ben. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Sun Aug 1 13:26:16 2004 From: anton at samba.org (Anton Blanchard) Date: Sun, 1 Aug 2004 13:26:16 +1000 Subject: [patch 3/4] Rework secondary SMT thread setup at boot In-Reply-To: <200407302145.i6ULjNc0063654@austin.ibm.com> References: <200407302145.i6ULjNc0063654@austin.ibm.com> Message-ID: <20040801032616.GE30253@krispykreme> Hi Nathan, > Our (ab)use of cpu_possible_map in setup_system to start secondary SMT > threads bothers me. Mark such threads in cpu_possible_map during > early boot; let RTAS tell us which present cpus are still offline > later so we can start them. I worry that some machines may not have a query-cpu-stopped-state rtas call... just checked and the s7a doesnt. If we fix query_cpu_stopped to not BUG and instead return error when it doesnt exist that should work. > I'm not totally sure about this one, it might be better to set up > cpu_sibling_map in prom_hold_cpus and use that in setup_system. > > Signed-off-by: Nathan Lynch > > > --- > > > diff -puN arch/ppc64/kernel/setup.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/setup.c > --- 2.6.8-rc2-mm1/arch/ppc64/kernel/setup.c~ppc64-fix-secondary-smt-thread-setup 2004-07-30 16:32:16.000000000 -0500 > +++ 2.6.8-rc2-mm1-nathanl/arch/ppc64/kernel/setup.c 2004-07-30 16:32:16.000000000 -0500 > @@ -232,16 +232,17 @@ void setup_system(unsigned long r3, unsi > chrp_init(r3, r4, r5, r6, r7); > > #ifdef CONFIG_SMP > - /* Start secondary threads on SMT systems */ > - for (i = 0; i < NR_CPUS; i++) { > - if (cpu_available(i) && !cpu_possible(i)) { > + /* Start secondary threads on SMT systems; primary threads > + * are already in the running state. > + */ > + for_each_present_cpu(i) { > + if (query_cpu_stopped > + (get_hard_smp_processor_id(i)) == 0) { > printk("%16.16x : starting thread\n", i); > rtas_call(rtas_token("start-cpu"), 3, 1, &ret, > get_hard_smp_processor_id(i), > (u32)*((unsigned long *)pseries_secondary_smp_init), > i); > - cpu_set(i, cpu_possible_map); > - systemcfg->processorCount++; > } > } > #endif /* CONFIG_SMP */ > diff -puN arch/ppc64/kernel/prom.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/prom.c > --- 2.6.8-rc2-mm1/arch/ppc64/kernel/prom.c~ppc64-fix-secondary-smt-thread-setup 2004-07-30 16:32:16.000000000 -0500 > +++ 2.6.8-rc2-mm1-nathanl/arch/ppc64/kernel/prom.c 2004-07-30 16:32:16.000000000 -0500 > @@ -1076,6 +1076,8 @@ next: > cpu_set(cpuid, RELOC(cpu_available_map)); > cpu_set(cpuid, RELOC(cpu_present_at_boot)); > cpu_set(cpuid, RELOC(cpu_present_map)); > + cpu_set(cpuid, RELOC(cpu_possible_map)); > + _systemcfg->processorCount++; > prom_printf("available\n"); > } else { > prom_printf("not available\n"); > diff -puN arch/ppc64/kernel/smp.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/smp.c > --- 2.6.8-rc2-mm1/arch/ppc64/kernel/smp.c~ppc64-fix-secondary-smt-thread-setup 2004-07-30 16:32:16.000000000 -0500 > +++ 2.6.8-rc2-mm1-nathanl/arch/ppc64/kernel/smp.c 2004-07-30 16:32:16.000000000 -0500 > @@ -228,7 +228,6 @@ static void __devinit smp_openpic_setup_ > do_openpic_setup_cpu(); > } > > -#ifdef CONFIG_HOTPLUG_CPU > /* Get state of physical CPU. > * Return codes: > * 0 - The processor is in the RTAS stopped state > @@ -237,7 +236,7 @@ static void __devinit smp_openpic_setup_ > * -1 - Hardware Error > * -2 - Hardware Busy, Try again later. > */ > -static int query_cpu_stopped(unsigned int pcpu) > +int query_cpu_stopped(unsigned int pcpu) > { > int cpu_status; > int status, qcss_tok; > @@ -254,6 +253,8 @@ static int query_cpu_stopped(unsigned in > return cpu_status; > } > > +#ifdef CONFIG_HOTPLUG_CPU > + > int __cpu_disable(void) > { > /* FIXME: go put this in a header somewhere */ > diff -puN include/asm-ppc64/smp.h~ppc64-fix-secondary-smt-thread-setup include/asm-ppc64/smp.h > --- 2.6.8-rc2-mm1/include/asm-ppc64/smp.h~ppc64-fix-secondary-smt-thread-setup 2004-07-30 16:32:16.000000000 -0500 > +++ 2.6.8-rc2-mm1-nathanl/include/asm-ppc64/smp.h 2004-07-30 16:32:16.000000000 -0500 > @@ -73,6 +73,7 @@ void smp_init_pSeries(void); > extern int __cpu_disable(void); > extern void __cpu_die(unsigned int cpu); > extern void cpu_die(void) __attribute__((noreturn)); > +extern int query_cpu_stopped(unsigned int pcpu); > #ifdef CONFIG_SCHED_SMT > extern cpumask_t cpu_sibling_map[NR_CPUS]; > #endif > > _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Sun Aug 1 13:49:20 2004 From: anton at samba.org (Anton Blanchard) Date: Sun, 1 Aug 2004 13:49:20 +1000 Subject: [patch 4/4] Remove unnecessary cpu maps (available, present_at_boot) In-Reply-To: <200407302145.i6ULjXc0055970@austin.ibm.com> References: <200407302145.i6ULjXc0055970@austin.ibm.com> Message-ID: <20040801034920.GF30253@krispykreme> Hi Nathan, > With cpu_present_map, we don't need these any longer. Thanks for all those patches. I tested them on current BK together with Srivatsa's cpu up race and managed to get an oops: cpu 1 (hwid 1) Ready to die... cpu 3 (hwid 3) Ready to die... cpu 0x5: Vector: 300 (Data Access) at [c000000002d2f4b0] pc: c00000000004b794: .find_busiest_group+0x290/0x450 lr: c00000000004b6c0: .find_busiest_group+0x1bc/0x450 sp: c000000002d2f730 msr: 8000000000001032 dar: 18 dsisr: 40000000 current = 0xc000000002909320 paca = 0xc00000000054ed00 pid = 2928, comm = kstopmachine enter ? for help 5:mon> Which is probably due to the SMT scheduler, at least it disappeared after I disabled the SMT scheduler config option. So it should go away once we get your hotplug fixes for sched domains merged. These 4 look good to merge once we work out the query cpu state rtas call issue. Anton ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Sun Aug 1 13:57:04 2004 From: anton at samba.org (Anton Blanchard) Date: Sun, 1 Aug 2004 13:57:04 +1000 Subject: [PATCH] [ppc64] fix hotplug irq migration code Message-ID: <20040801035704.GG30253@krispykreme> Hi, In migrate_irqs_away we werent converting a virtual irq to a real one. We ended up passing the wrong irq numbers to the hypervisor and migration of affinitised irqs on cpu hot unplug didnt work. Also clarify the rtas_stop_self printk. Signed-off-by: Anton Blanchard diff -puN arch/ppc64/kernel/xics.c~migrate-irqs-away arch/ppc64/kernel/xics.c --- foobar2/arch/ppc64/kernel/xics.c~migrate-irqs-away 2004-08-01 11:44:41.412353159 +1000 +++ foobar2-anton/arch/ppc64/kernel/xics.c 2004-08-01 11:53:43.050041725 +1000 @@ -657,9 +657,7 @@ void xics_migrate_irqs_away(void) int set_indicator = rtas_token("set-indicator"); const unsigned int giqs = 9005UL; /* Global Interrupt Queue Server */ int status = 0; - unsigned int irq, cpu = smp_processor_id(); - int xics_status[2]; - unsigned long flags; + unsigned int irq, virq, cpu = smp_processor_id(); BUG_ON(set_indicator == RTAS_UNKNOWN_SERVICE); @@ -676,12 +674,20 @@ void xics_migrate_irqs_away(void) ops->cppr_info(cpu, DEFAULT_PRIORITY); iosync(); - printk(KERN_WARNING "HOTPLUG: Migrating IRQs away\n"); - for_each_irq(irq) { - irq_desc_t *desc = get_irq_desc(irq); + for_each_irq(virq) { + irq_desc_t *desc; + int xics_status[2]; + unsigned long flags; + + /* We cant set affinity on ISA interrupts */ + if (virq < irq_offset_value()) + continue; + + desc = get_irq_desc(virq); + irq = virt_irq_to_real(irq_offset_down(virq)); /* We need to get IPIs still. */ - if (irq_offset_down(irq) == XICS_IPI) + if (irq == XICS_IPI || irq == NO_IRQ) continue; /* We only need to migrate enabled IRQS */ @@ -696,7 +702,7 @@ void xics_migrate_irqs_away(void) if (status) { printk(KERN_ERR "migrate_irqs_away: irq=%d " "ibm,get-xive returns %d\n", - irq, status); + virq, status); goto unlock; } @@ -709,21 +715,20 @@ void xics_migrate_irqs_away(void) goto unlock; printk(KERN_WARNING "IRQ %d affinity broken off cpu %u\n", - irq, cpu); + virq, cpu); /* Reset affinity to all cpus */ xics_status[0] = default_distrib_server; - status = rtas_call(ibm_set_xive, 3, 1, NULL, - irq, xics_status[0], xics_status[1]); + status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, + xics_status[0], xics_status[1]); if (status) printk(KERN_ERR "migrate_irqs_away irq=%d " "ibm,set-xive returns %d\n", - irq, status); + virq, status); unlock: spin_unlock_irqrestore(&desc->lock, flags); } - } #endif diff -L xics.c -puN /dev/null /dev/null diff -puN arch/ppc64/kernel/rtas.c~migrate-irqs-away arch/ppc64/kernel/rtas.c --- foobar2/arch/ppc64/kernel/rtas.c~migrate-irqs-away 2004-08-01 12:39:46.199965774 +1000 +++ foobar2-anton/arch/ppc64/kernel/rtas.c 2004-08-01 12:40:28.722010572 +1000 @@ -500,7 +500,7 @@ void rtas_stop_self(void) BUG_ON(rtas_args->token == RTAS_UNKNOWN_SERVICE); - printk("%u %u Ready to die...\n", + printk("cpu %u (hwid %u) Ready to die...\n", smp_processor_id(), hard_smp_processor_id()); enter_rtas(__pa(rtas_args)); _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Sun Aug 1 14:59:53 2004 From: anton at samba.org (Anton Blanchard) Date: Sun, 1 Aug 2004 14:59:53 +1000 Subject: [patch 4/4] Remove unnecessary cpu maps (available, present_at_boot) In-Reply-To: <20040801034920.GF30253@krispykreme> References: <200407302145.i6ULjXc0055970@austin.ibm.com> <20040801034920.GF30253@krispykreme> Message-ID: <20040801045953.GH30253@krispykreme> Hi, > Thanks for all those patches. I tested them on current BK together with > Srivatsa's cpu up race and managed to get an oops: > > cpu 1 (hwid 1) Ready to die... > cpu 3 (hwid 3) Ready to die... > cpu 0x5: Vector: 300 (Data Access) at [c000000002d2f4b0] > pc: c00000000004b794: .find_busiest_group+0x290/0x450 > lr: c00000000004b6c0: .find_busiest_group+0x1bc/0x450 > sp: c000000002d2f730 > msr: 8000000000001032 > dar: 18 > dsisr: 40000000 > current = 0xc000000002909320 > paca = 0xc00000000054ed00 > pid = 2928, comm = kstopmachine > enter ? for help > 5:mon> > > Which is probably due to the SMT scheduler, at least it disappeared > after I disabled the SMT scheduler config option. So it should go away > once we get your hotplug fixes for sched domains merged. Managed to make it pop with SMT sched off. Still, its probably sched domains topology setup/teardown I think. Werent we using stop machine to synchronise updates to the sched domains topology on sles9? Anton cpu 5 (hwid 5) Ready to die... cpu 0x7: Vector: 300 (Data Access) at [c00000000230b4b0] pc: c000000000049224: .find_busiest_group+0x290/0x450 lr: c000000000049150: .find_busiest_group+0x1bc/0x450 sp: c00000000230b730 msr: 8000000000001032 dar: 18 dsisr: 40000000 current = 0xc0000000029ce9b0 paca = 0xc00000000054ff00 pid = 3438, comm = kstopmachine enter ? for help 7:mon> t [c00000000230b730] c00000000230b7c0 (unreliable) [c00000000230b850] c00000000004aff0 .rebalance_tick+0x12c/0x2d4 [c00000000230b940] c00000000005bf98 .update_process_times+0xc4/0x154 [c00000000230b9e0] c000000000038dc0 .smp_local_timer_interrupt+0x3c/0x58 [c00000000230ba50] c00000000001529c .timer_interrupt+0x11c/0x3fc [c00000000230bb30] c00000000000a2b4 Decrementer_common+0xb4/0x100 --- Exception: 901 (Decrementer) at c000000000073ad0 .do_stop+0x26c/0x27c [c00000000230be20] c000000000073ab4 .do_stop+0x250/0x27c (unreliable) [c00000000230bed0] c00000000006b1c0 .kthread+0x178/0x1c8 [c00000000230bf90] c000000000017dac .kernel_thread+0x4c/0x68 ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:12:35 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:12:35 +1000 Subject: [0/5] STAB cleanup Message-ID: <20040803021235.GB3056@zax> This series of patches clean up the segment table code in the PPC64 kernel. I've given the patches basic testing on POWER3 (RS/6000 270) and RS64 iSeries. Paul, if you don't see any problems with these, please forward upstream. The five patches are: 1/5 stabs-move-to-mm - Move stab code to arch/ppc64/mm 2/5 stabs-kill-bitfields - Remove ugly bitfields 3/5 stabs-random-cleanups - Various simple code cleanups 4/5 stabs-checks-in-raw-ste-allocate - Remove duplication of various address check 5/5 stabs-switch-stab - Give flush_stab() a better name -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:13:28 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:13:28 +1000 Subject: [1/5] STAB cleanup - move to arch/ppc64/mm In-Reply-To: <20040803021235.GB3056@zax> References: <20040803021235.GB3056@zax> Message-ID: <20040803021328.GC3056@zax> Move the segment table handling code from arch/ppc64/kernel to arch/ppc64/mm where it better belongs. This patch doesn't actually change the code at all. Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/kernel/Makefile =================================================================== --- working-2.6.orig/arch/ppc64/kernel/Makefile +++ working-2.6/arch/ppc64/kernel/Makefile @@ -7,7 +7,7 @@ obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ time.o process.o signal.o syscalls.o misc.o ptrace.o \ - align.o semaphore.o bitops.o stab.o pacaData.o \ + align.o semaphore.o bitops.o pacaData.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ Index: working-2.6/arch/ppc64/kernel/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/kernel/stab.c +++ /dev/null @@ -1,281 +0,0 @@ -/* - * PowerPC64 Segment Translation Support. - * - * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com - * Copyright (c) 2001 Dave Engebretsen - * - * Copyright (C) 2002 Anton Blanchard , IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include - -static int make_ste(unsigned long stab, unsigned long esid, - unsigned long vsid); - -void slb_initialize(void); - -/* - * Build an entry for the base kernel segment and put it into - * the segment table or SLB. All other segment table or SLB - * entries are faulted in. - */ -void stab_initialize(unsigned long stab) -{ - unsigned long vsid = get_kernel_vsid(KERNELBASE); - - if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { - slb_initialize(); - } else { - asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(KERNELBASE), vsid); - - /* Order update */ - asm volatile("sync":::"memory"); - } -} - -/* Both the segment table and SLB code uses the following cache */ -#define NR_STAB_CACHE_ENTRIES 8 -DEFINE_PER_CPU(long, stab_cache_ptr); -DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); - -/* - * Segment table stuff - */ - -/* - * Create a segment table entry for the given esid/vsid pair. - */ -static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) -{ - unsigned long entry, group, old_esid, castout_entry, i; - unsigned int global_entry; - STE *ste, *castout_ste; - unsigned long kernel_segment = (REGION_ID(esid << SID_SHIFT) != - USER_REGION_ID); - - /* Search the primary group first. */ - global_entry = (esid & 0x1f) << 3; - ste = (STE *)(stab | ((esid & 0x1f) << 7)); - - /* Find an empty entry, if one exists. */ - for (group = 0; group < 2; group++) { - for (entry = 0; entry < 8; entry++, ste++) { - if (!(ste->dw0.dw0.v)) { - ste->dw0.dword0 = 0; - ste->dw1.dword1 = 0; - ste->dw1.dw1.vsid = vsid; - ste->dw0.dw0.esid = esid; - ste->dw0.dw0.kp = 1; - if (!kernel_segment) - ste->dw0.dw0.ks = 1; - asm volatile("eieio":::"memory"); - ste->dw0.dw0.v = 1; - return (global_entry | entry); - } - } - /* Now search the secondary group. */ - global_entry = ((~esid) & 0x1f) << 3; - ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); - } - - /* - * Could not find empty entry, pick one with a round robin selection. - * Search all entries in the two groups. - */ - castout_entry = get_paca()->stab_rr; - for (i = 0; i < 16; i++) { - if (castout_entry < 8) { - global_entry = (esid & 0x1f) << 3; - ste = (STE *)(stab | ((esid & 0x1f) << 7)); - castout_ste = ste + castout_entry; - } else { - global_entry = ((~esid) & 0x1f) << 3; - ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); - castout_ste = ste + (castout_entry - 8); - } - - /* Dont cast out the first kernel segment */ - if (castout_ste->dw0.dw0.esid != GET_ESID(KERNELBASE)) - break; - - castout_entry = (castout_entry + 1) & 0xf; - } - - get_paca()->stab_rr = (castout_entry + 1) & 0xf; - - /* Modify the old entry to the new value. */ - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - castout_ste->dw0.dw0.v = 0; - asm volatile("sync" : : : "memory"); /* Order update */ - - castout_ste->dw0.dword0 = 0; - castout_ste->dw1.dword1 = 0; - castout_ste->dw1.dw1.vsid = vsid; - old_esid = castout_ste->dw0.dw0.esid; - castout_ste->dw0.dw0.esid = esid; - castout_ste->dw0.dw0.kp = 1; - if (!kernel_segment) - castout_ste->dw0.dw0.ks = 1; - asm volatile("eieio" : : : "memory"); /* Order update */ - castout_ste->dw0.dw0.v = 1; - asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); - /* Ensure completion of slbie */ - asm volatile("sync" : : : "memory"); - - return (global_entry | (castout_entry & 0x7)); -} - -static inline void __ste_allocate(unsigned long esid, unsigned long vsid) -{ - unsigned char stab_entry; - unsigned long offset; - int region_id = REGION_ID(esid << SID_SHIFT); - - stab_entry = make_ste(get_paca()->stab_addr, esid, vsid); - - if (region_id != USER_REGION_ID) - return; - - offset = __get_cpu_var(stab_cache_ptr); - if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = stab_entry; - else - offset = NR_STAB_CACHE_ENTRIES+1; - __get_cpu_var(stab_cache_ptr) = offset; -} - -/* - * Allocate a segment table entry for the given ea. - */ -int ste_allocate(unsigned long ea) -{ - unsigned long vsid, esid; - mm_context_t context; - - /* Check for invalid effective addresses. */ - if (!IS_VALID_EA(ea)) - return 1; - - /* Kernel or user address? */ - if (REGION_ID(ea) >= KERNEL_REGION_ID) { - vsid = get_kernel_vsid(ea); - context = KERNEL_CONTEXT(ea); - } else { - if (!current->mm) - return 1; - - context = current->mm->context; - vsid = get_vsid(context.id, ea); - } - - esid = GET_ESID(ea); - __ste_allocate(esid, vsid); - /* Order update */ - asm volatile("sync":::"memory"); - - return 0; -} - -/* - * preload some userspace segments into the segment table. - */ -static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) -{ - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long unmapped_base; - unsigned long pc_esid = GET_ESID(pc); - unsigned long stack_esid = GET_ESID(stack); - unsigned long unmapped_base_esid; - unsigned long vsid; - - if (test_tsk_thread_flag(tsk, TIF_32BIT)) - unmapped_base = TASK_UNMAPPED_BASE_USER32; - else - unmapped_base = TASK_UNMAPPED_BASE_USER64; - - unmapped_base_esid = GET_ESID(unmapped_base); - - if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context.id, pc); - __ste_allocate(pc_esid, vsid); - - if (pc_esid == stack_esid) - return; - - if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context.id, stack); - __ste_allocate(stack_esid, vsid); - - if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) - return; - - if (!IS_VALID_EA(unmapped_base) || - (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) - return; - vsid = get_vsid(mm->context.id, unmapped_base); - __ste_allocate(unmapped_base_esid, vsid); - - /* Order update */ - asm volatile("sync" : : : "memory"); -} - -/* Flush all user entries from the segment table of the current processor. */ -void flush_stab(struct task_struct *tsk, struct mm_struct *mm) -{ - STE *stab = (STE *) get_paca()->stab_addr; - STE *ste; - unsigned long offset = __get_cpu_var(stab_cache_ptr); - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - if (offset <= NR_STAB_CACHE_ENTRIES) { - int i; - - for (i = 0; i < offset; i++) { - ste = stab + __get_cpu_var(stab_cache[i]); - ste->dw0.dw0.v = 0; - } - } else { - unsigned long entry; - - /* Invalidate all entries. */ - ste = stab; - - /* Never flush the first entry. */ - ste += 1; - for (entry = 1; - entry < (PAGE_SIZE / sizeof(STE)); - entry++, ste++) { - unsigned long ea; - ea = ste->dw0.dw0.esid << SID_SHIFT; - if (ea < KERNELBASE) { - ste->dw0.dw0.v = 0; - } - } - } - - asm volatile("sync; slbia; sync":::"memory"); - - __get_cpu_var(stab_cache_ptr) = 0; - - preload_stab(tsk, mm); -} Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- /dev/null +++ working-2.6/arch/ppc64/mm/stab.c @@ -0,0 +1,281 @@ +/* + * PowerPC64 Segment Translation Support. + * + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * + * Copyright (C) 2002 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +static int make_ste(unsigned long stab, unsigned long esid, + unsigned long vsid); + +void slb_initialize(void); + +/* + * Build an entry for the base kernel segment and put it into + * the segment table or SLB. All other segment table or SLB + * entries are faulted in. + */ +void stab_initialize(unsigned long stab) +{ + unsigned long vsid = get_kernel_vsid(KERNELBASE); + + if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { + slb_initialize(); + } else { + asm volatile("isync; slbia; isync":::"memory"); + make_ste(stab, GET_ESID(KERNELBASE), vsid); + + /* Order update */ + asm volatile("sync":::"memory"); + } +} + +/* Both the segment table and SLB code uses the following cache */ +#define NR_STAB_CACHE_ENTRIES 8 +DEFINE_PER_CPU(long, stab_cache_ptr); +DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); + +/* + * Segment table stuff + */ + +/* + * Create a segment table entry for the given esid/vsid pair. + */ +static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) +{ + unsigned long entry, group, old_esid, castout_entry, i; + unsigned int global_entry; + STE *ste, *castout_ste; + unsigned long kernel_segment = (REGION_ID(esid << SID_SHIFT) != + USER_REGION_ID); + + /* Search the primary group first. */ + global_entry = (esid & 0x1f) << 3; + ste = (STE *)(stab | ((esid & 0x1f) << 7)); + + /* Find an empty entry, if one exists. */ + for (group = 0; group < 2; group++) { + for (entry = 0; entry < 8; entry++, ste++) { + if (!(ste->dw0.dw0.v)) { + ste->dw0.dword0 = 0; + ste->dw1.dword1 = 0; + ste->dw1.dw1.vsid = vsid; + ste->dw0.dw0.esid = esid; + ste->dw0.dw0.kp = 1; + if (!kernel_segment) + ste->dw0.dw0.ks = 1; + asm volatile("eieio":::"memory"); + ste->dw0.dw0.v = 1; + return (global_entry | entry); + } + } + /* Now search the secondary group. */ + global_entry = ((~esid) & 0x1f) << 3; + ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + } + + /* + * Could not find empty entry, pick one with a round robin selection. + * Search all entries in the two groups. + */ + castout_entry = get_paca()->stab_rr; + for (i = 0; i < 16; i++) { + if (castout_entry < 8) { + global_entry = (esid & 0x1f) << 3; + ste = (STE *)(stab | ((esid & 0x1f) << 7)); + castout_ste = ste + castout_entry; + } else { + global_entry = ((~esid) & 0x1f) << 3; + ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + castout_ste = ste + (castout_entry - 8); + } + + /* Dont cast out the first kernel segment */ + if (castout_ste->dw0.dw0.esid != GET_ESID(KERNELBASE)) + break; + + castout_entry = (castout_entry + 1) & 0xf; + } + + get_paca()->stab_rr = (castout_entry + 1) & 0xf; + + /* Modify the old entry to the new value. */ + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + castout_ste->dw0.dw0.v = 0; + asm volatile("sync" : : : "memory"); /* Order update */ + + castout_ste->dw0.dword0 = 0; + castout_ste->dw1.dword1 = 0; + castout_ste->dw1.dw1.vsid = vsid; + old_esid = castout_ste->dw0.dw0.esid; + castout_ste->dw0.dw0.esid = esid; + castout_ste->dw0.dw0.kp = 1; + if (!kernel_segment) + castout_ste->dw0.dw0.ks = 1; + asm volatile("eieio" : : : "memory"); /* Order update */ + castout_ste->dw0.dw0.v = 1; + asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); + /* Ensure completion of slbie */ + asm volatile("sync" : : : "memory"); + + return (global_entry | (castout_entry & 0x7)); +} + +static inline void __ste_allocate(unsigned long esid, unsigned long vsid) +{ + unsigned char stab_entry; + unsigned long offset; + int region_id = REGION_ID(esid << SID_SHIFT); + + stab_entry = make_ste(get_paca()->stab_addr, esid, vsid); + + if (region_id != USER_REGION_ID) + return; + + offset = __get_cpu_var(stab_cache_ptr); + if (offset < NR_STAB_CACHE_ENTRIES) + __get_cpu_var(stab_cache[offset++]) = stab_entry; + else + offset = NR_STAB_CACHE_ENTRIES+1; + __get_cpu_var(stab_cache_ptr) = offset; +} + +/* + * Allocate a segment table entry for the given ea. + */ +int ste_allocate(unsigned long ea) +{ + unsigned long vsid, esid; + mm_context_t context; + + /* Check for invalid effective addresses. */ + if (!IS_VALID_EA(ea)) + return 1; + + /* Kernel or user address? */ + if (REGION_ID(ea) >= KERNEL_REGION_ID) { + vsid = get_kernel_vsid(ea); + context = KERNEL_CONTEXT(ea); + } else { + if (!current->mm) + return 1; + + context = current->mm->context; + vsid = get_vsid(context.id, ea); + } + + esid = GET_ESID(ea); + __ste_allocate(esid, vsid); + /* Order update */ + asm volatile("sync":::"memory"); + + return 0; +} + +/* + * preload some userspace segments into the segment table. + */ +static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; + unsigned long pc_esid = GET_ESID(pc); + unsigned long stack_esid = GET_ESID(stack); + unsigned long unmapped_base_esid; + unsigned long vsid; + + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + unmapped_base_esid = GET_ESID(unmapped_base); + + if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context.id, pc); + __ste_allocate(pc_esid, vsid); + + if (pc_esid == stack_esid) + return; + + if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context.id, stack); + __ste_allocate(stack_esid, vsid); + + if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) + return; + + if (!IS_VALID_EA(unmapped_base) || + (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) + return; + vsid = get_vsid(mm->context.id, unmapped_base); + __ste_allocate(unmapped_base_esid, vsid); + + /* Order update */ + asm volatile("sync" : : : "memory"); +} + +/* Flush all user entries from the segment table of the current processor. */ +void flush_stab(struct task_struct *tsk, struct mm_struct *mm) +{ + STE *stab = (STE *) get_paca()->stab_addr; + STE *ste; + unsigned long offset = __get_cpu_var(stab_cache_ptr); + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); + + if (offset <= NR_STAB_CACHE_ENTRIES) { + int i; + + for (i = 0; i < offset; i++) { + ste = stab + __get_cpu_var(stab_cache[i]); + ste->dw0.dw0.v = 0; + } + } else { + unsigned long entry; + + /* Invalidate all entries. */ + ste = stab; + + /* Never flush the first entry. */ + ste += 1; + for (entry = 1; + entry < (PAGE_SIZE / sizeof(STE)); + entry++, ste++) { + unsigned long ea; + ea = ste->dw0.dw0.esid << SID_SHIFT; + if (ea < KERNELBASE) { + ste->dw0.dw0.v = 0; + } + } + } + + asm volatile("sync; slbia; sync":::"memory"); + + __get_cpu_var(stab_cache_ptr) = 0; + + preload_stab(tsk, mm); +} Index: working-2.6/arch/ppc64/mm/Makefile =================================================================== --- working-2.6.orig/arch/ppc64/mm/Makefile +++ working-2.6/arch/ppc64/mm/Makefile @@ -4,6 +4,7 @@ EXTRA_CFLAGS += -mno-minimal-toc -obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o slb_low.o slb.o +obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \ + slb_low.o slb.o stab.o obj-$(CONFIG_DISCONTIGMEM) += numa.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:13:47 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:13:47 +1000 Subject: [2/5] STAB cleanup - kill bitfields In-Reply-To: <20040803021328.GC3056@zax> References: <20040803021235.GB3056@zax> <20040803021328.GC3056@zax> Message-ID: <20040803021347.GD3056@zax> Remove the overly verbose and hard to follow use of bitfields in the PPC64 segment table code, replacing it with explicit bitmask operations. Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/stab.c 2004-07-29 16:14:46.201804936 +1000 +++ working-2.6/arch/ppc64/mm/stab.c 2004-07-29 16:14:46.642871056 +1000 @@ -61,33 +61,32 @@ { unsigned long entry, group, old_esid, castout_entry, i; unsigned int global_entry; - STE *ste, *castout_ste; + struct stab_entry *ste, *castout_ste; unsigned long kernel_segment = (REGION_ID(esid << SID_SHIFT) != USER_REGION_ID); + unsigned long esid_data; /* Search the primary group first. */ global_entry = (esid & 0x1f) << 3; - ste = (STE *)(stab | ((esid & 0x1f) << 7)); + ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); /* Find an empty entry, if one exists. */ for (group = 0; group < 2; group++) { for (entry = 0; entry < 8; entry++, ste++) { - if (!(ste->dw0.dw0.v)) { - ste->dw0.dword0 = 0; - ste->dw1.dword1 = 0; - ste->dw1.dw1.vsid = vsid; - ste->dw0.dw0.esid = esid; - ste->dw0.dw0.kp = 1; - if (!kernel_segment) - ste->dw0.dw0.ks = 1; + if (!(ste->esid_data & STE_ESID_V)) { + ste->vsid_data = vsid << STE_VSID_SHIFT; asm volatile("eieio":::"memory"); - ste->dw0.dw0.v = 1; + esid_data = esid << SID_SHIFT; + esid_data |= STE_ESID_KP | STE_ESID_V; + if (! kernel_segment) + esid_data |= STE_ESID_KS; + ste->esid_data = esid_data; return (global_entry | entry); } } /* Now search the secondary group. */ global_entry = ((~esid) & 0x1f) << 3; - ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); } /* @@ -98,16 +97,16 @@ for (i = 0; i < 16; i++) { if (castout_entry < 8) { global_entry = (esid & 0x1f) << 3; - ste = (STE *)(stab | ((esid & 0x1f) << 7)); + ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); castout_ste = ste + castout_entry; } else { global_entry = ((~esid) & 0x1f) << 3; - ste = (STE *)(stab | (((~esid) & 0x1f) << 7)); + ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); castout_ste = ste + (castout_entry - 8); } /* Dont cast out the first kernel segment */ - if (castout_ste->dw0.dw0.esid != GET_ESID(KERNELBASE)) + if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE) break; castout_entry = (castout_entry + 1) & 0xf; @@ -120,19 +119,21 @@ /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); - castout_ste->dw0.dw0.v = 0; + old_esid = castout_ste->esid_data >> SID_SHIFT; + castout_ste->esid_data = 0; /* Invalidate old entry */ + asm volatile("sync" : : : "memory"); /* Order update */ - castout_ste->dw0.dword0 = 0; - castout_ste->dw1.dword1 = 0; - castout_ste->dw1.dw1.vsid = vsid; - old_esid = castout_ste->dw0.dw0.esid; - castout_ste->dw0.dw0.esid = esid; - castout_ste->dw0.dw0.kp = 1; - if (!kernel_segment) - castout_ste->dw0.dw0.ks = 1; + castout_ste->vsid_data = vsid << STE_VSID_SHIFT; + asm volatile("eieio" : : : "memory"); /* Order update */ - castout_ste->dw0.dw0.v = 1; + + esid_data = esid << SID_SHIFT; + esid_data |= STE_ESID_KP | STE_ESID_V; + if (!kernel_segment) + esid_data |= STE_ESID_KS; + castout_ste->esid_data = esid_data; + asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); /* Ensure completion of slbie */ asm volatile("sync" : : : "memory"); @@ -240,8 +241,8 @@ /* Flush all user entries from the segment table of the current processor. */ void flush_stab(struct task_struct *tsk, struct mm_struct *mm) { - STE *stab = (STE *) get_paca()->stab_addr; - STE *ste; + struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; + struct stab_entry *ste; unsigned long offset = __get_cpu_var(stab_cache_ptr); /* Force previous translations to complete. DRENG */ @@ -252,7 +253,7 @@ for (i = 0; i < offset; i++) { ste = stab + __get_cpu_var(stab_cache[i]); - ste->dw0.dw0.v = 0; + ste->esid_data = 0; /* invalidate entry */ } } else { unsigned long entry; @@ -263,12 +264,12 @@ /* Never flush the first entry. */ ste += 1; for (entry = 1; - entry < (PAGE_SIZE / sizeof(STE)); + entry < (PAGE_SIZE / sizeof(struct stab_entry)); entry++, ste++) { unsigned long ea; - ea = ste->dw0.dw0.esid << SID_SHIFT; + ea = ste->esid_data & ESID_MASK; if (ea < KERNELBASE) { - ste->dw0.dw0.v = 0; + ste->esid_data = 0; } } } Index: working-2.6/include/asm-ppc64/mmu.h =================================================================== --- working-2.6.orig/include/asm-ppc64/mmu.h 2004-07-29 16:14:41.040790952 +1000 +++ working-2.6/include/asm-ppc64/mmu.h 2004-07-29 16:14:46.643870904 +1000 @@ -37,33 +37,17 @@ mm_context_t ctx = { .id = REGION_ID(ea), KERNEL_LOW_HPAGES}; \ ctx; }) -typedef struct { - unsigned long esid: 36; /* Effective segment ID */ - unsigned long resv0:20; /* Reserved */ - unsigned long v: 1; /* Entry valid (v=1) or invalid */ - unsigned long resv1: 1; /* Reserved */ - unsigned long ks: 1; /* Supervisor (privileged) state storage key */ - unsigned long kp: 1; /* Problem state storage key */ - unsigned long n: 1; /* No-execute if n=1 */ - unsigned long resv2: 3; /* padding to a 64b boundary */ -} ste_dword0; - -typedef struct { - unsigned long vsid: 52; /* Virtual segment ID */ - unsigned long resv0:12; /* Padding to a 64b boundary */ -} ste_dword1; - -typedef struct _STE { - union { - unsigned long dword0; - ste_dword0 dw0; - } dw0; - - union { - unsigned long dword1; - ste_dword1 dw1; - } dw1; -} STE; +#define STE_ESID_V 0x80 +#define STE_ESID_KS 0x20 +#define STE_ESID_KP 0x10 +#define STE_ESID_N 0x08 + +#define STE_VSID_SHIFT 12 + +struct stab_entry { + unsigned long esid_data; + unsigned long vsid_data; +}; /* Hardware Page Table Entry */ -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:14:09 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:14:09 +1000 Subject: [3/5] STAB cleanup - assorted cleanups In-Reply-To: <20040803021347.GD3056@zax> References: <20040803021235.GB3056@zax> <20040803021328.GC3056@zax> <20040803021347.GD3056@zax> Message-ID: <20040803021409.GE3056@zax> Clean up various aspects of the PPC64 segment table management code: move code around to suit call order, remove redundant computations, and extra intermediate variables which don't really add to clarity. Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/stab.c +++ working-2.6/arch/ppc64/mm/stab.c @@ -20,51 +20,26 @@ #include #include -static int make_ste(unsigned long stab, unsigned long esid, - unsigned long vsid); - -void slb_initialize(void); - -/* - * Build an entry for the base kernel segment and put it into - * the segment table or SLB. All other segment table or SLB - * entries are faulted in. - */ -void stab_initialize(unsigned long stab) -{ - unsigned long vsid = get_kernel_vsid(KERNELBASE); - - if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { - slb_initialize(); - } else { - asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(KERNELBASE), vsid); - - /* Order update */ - asm volatile("sync":::"memory"); - } -} - /* Both the segment table and SLB code uses the following cache */ #define NR_STAB_CACHE_ENTRIES 8 DEFINE_PER_CPU(long, stab_cache_ptr); DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); /* - * Segment table stuff - */ - -/* * Create a segment table entry for the given esid/vsid pair. */ static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) { + unsigned long esid_data, vsid_data; unsigned long entry, group, old_esid, castout_entry, i; unsigned int global_entry; struct stab_entry *ste, *castout_ste; - unsigned long kernel_segment = (REGION_ID(esid << SID_SHIFT) != - USER_REGION_ID); - unsigned long esid_data; + unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE; + + vsid_data = vsid << STE_VSID_SHIFT; + esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; + if (! kernel_segment) + esid_data |= STE_ESID_KS; /* Search the primary group first. */ global_entry = (esid & 0x1f) << 3; @@ -74,12 +49,8 @@ for (group = 0; group < 2; group++) { for (entry = 0; entry < 8; entry++, ste++) { if (!(ste->esid_data & STE_ESID_V)) { - ste->vsid_data = vsid << STE_VSID_SHIFT; + ste->vsid_data = vsid_data; asm volatile("eieio":::"memory"); - esid_data = esid << SID_SHIFT; - esid_data |= STE_ESID_KP | STE_ESID_V; - if (! kernel_segment) - esid_data |= STE_ESID_KS; ste->esid_data = esid_data; return (global_entry | entry); } @@ -124,14 +95,8 @@ asm volatile("sync" : : : "memory"); /* Order update */ - castout_ste->vsid_data = vsid << STE_VSID_SHIFT; - + castout_ste->vsid_data = vsid_data; asm volatile("eieio" : : : "memory"); /* Order update */ - - esid_data = esid << SID_SHIFT; - esid_data |= STE_ESID_KP | STE_ESID_V; - if (!kernel_segment) - esid_data |= STE_ESID_KS; castout_ste->esid_data = esid_data; asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); @@ -145,11 +110,10 @@ { unsigned char stab_entry; unsigned long offset; - int region_id = REGION_ID(esid << SID_SHIFT); stab_entry = make_ste(get_paca()->stab_addr, esid, vsid); - if (region_id != USER_REGION_ID) + if ((esid << SID_SHIFT) >= KERNELBASE) return; offset = __get_cpu_var(stab_cache_ptr); @@ -165,27 +129,23 @@ */ int ste_allocate(unsigned long ea) { - unsigned long vsid, esid; - mm_context_t context; + unsigned long vsid; /* Check for invalid effective addresses. */ if (!IS_VALID_EA(ea)) return 1; /* Kernel or user address? */ - if (REGION_ID(ea) >= KERNEL_REGION_ID) { + if (ea >= KERNELBASE) { vsid = get_kernel_vsid(ea); - context = KERNEL_CONTEXT(ea); } else { if (!current->mm) return 1; - context = current->mm->context; - vsid = get_vsid(context.id, ea); + vsid = get_vsid(current->mm->context.id, ea); } - esid = GET_ESID(ea); - __ste_allocate(esid, vsid); + __ste_allocate(GET_ESID(ea), vsid); /* Order update */ asm volatile("sync":::"memory"); @@ -200,39 +160,34 @@ unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; - unsigned long pc_esid = GET_ESID(pc); - unsigned long stack_esid = GET_ESID(stack); - unsigned long unmapped_base_esid; unsigned long vsid; if (test_tsk_thread_flag(tsk, TIF_32BIT)) unmapped_base = TASK_UNMAPPED_BASE_USER32; else unmapped_base = TASK_UNMAPPED_BASE_USER64; - - unmapped_base_esid = GET_ESID(unmapped_base); - - if (!IS_VALID_EA(pc) || (REGION_ID(pc) >= KERNEL_REGION_ID)) + + if (!IS_VALID_EA(pc) || (pc >= KERNELBASE)) return; vsid = get_vsid(mm->context.id, pc); - __ste_allocate(pc_esid, vsid); - - if (pc_esid == stack_esid) + __ste_allocate(GET_ESID(pc), vsid); + + if (GET_ESID(pc) == GET_ESID(stack)) return; - - if (!IS_VALID_EA(stack) || (REGION_ID(stack) >= KERNEL_REGION_ID)) + + if (!IS_VALID_EA(stack) || (stack >= KERNELBASE)) return; vsid = get_vsid(mm->context.id, stack); - __ste_allocate(stack_esid, vsid); - - if (pc_esid == unmapped_base_esid || stack_esid == unmapped_base_esid) + __ste_allocate(GET_ESID(stack), vsid); + + if ((GET_ESID(pc) == GET_ESID(unmapped_base)) + || (GET_ESID(stack) == GET_ESID(unmapped_base))) return; - - if (!IS_VALID_EA(unmapped_base) || - (REGION_ID(unmapped_base) >= KERNEL_REGION_ID)) + + if (!IS_VALID_EA(unmapped_base) || (unmapped_base >= KERNELBASE)) return; vsid = get_vsid(mm->context.id, unmapped_base); - __ste_allocate(unmapped_base_esid, vsid); + __ste_allocate(GET_ESID(unmapped_base), vsid); /* Order update */ asm volatile("sync" : : : "memory"); @@ -280,3 +235,25 @@ preload_stab(tsk, mm); } + +extern void slb_initialize(void); + +/* + * Build an entry for the base kernel segment and put it into + * the segment table or SLB. All other segment table or SLB + * entries are faulted in. + */ +void stab_initialize(unsigned long stab) +{ + unsigned long vsid = get_kernel_vsid(KERNELBASE); + + if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { + slb_initialize(); + } else { + asm volatile("isync; slbia; isync":::"memory"); + make_ste(stab, GET_ESID(KERNELBASE), vsid); + + /* Order update */ + asm volatile("sync":::"memory"); + } +} -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:15:24 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:15:24 +1000 Subject: [4/5] STAB cleanup - remove check duplication In-Reply-To: <20040803021409.GE3056@zax> References: <20040803021235.GB3056@zax> <20040803021328.GC3056@zax> <20040803021347.GD3056@zax> <20040803021409.GE3056@zax> Message-ID: <20040803021524.GF3056@zax> Rearrange the ste_allocate()/__ste_allocate() path in the PPC64 segment table code more sensibly. This moves various valid address checks into the lower-level __ste_allocate(), meaning the checks don't need to be duplicated in preload_stab(). Signed-off-by: David Gibson Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/stab.c 2004-07-30 13:17:34.828858504 +1000 +++ working-2.6/arch/ppc64/mm/stab.c 2004-07-30 13:39:57.338854248 +1000 @@ -106,30 +106,14 @@ return (global_entry | (castout_entry & 0x7)); } -static inline void __ste_allocate(unsigned long esid, unsigned long vsid) -{ - unsigned char stab_entry; - unsigned long offset; - - stab_entry = make_ste(get_paca()->stab_addr, esid, vsid); - - if ((esid << SID_SHIFT) >= KERNELBASE) - return; - - offset = __get_cpu_var(stab_cache_ptr); - if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = stab_entry; - else - offset = NR_STAB_CACHE_ENTRIES+1; - __get_cpu_var(stab_cache_ptr) = offset; -} - /* - * Allocate a segment table entry for the given ea. + * Allocate a segment table entry for the given ea and mm */ -int ste_allocate(unsigned long ea) +static int __ste_allocate(unsigned long ea, struct mm_struct *mm) { unsigned long vsid; + unsigned char stab_entry; + unsigned long offset; /* Check for invalid effective addresses. */ if (!IS_VALID_EA(ea)) @@ -139,19 +123,34 @@ if (ea >= KERNELBASE) { vsid = get_kernel_vsid(ea); } else { - if (!current->mm) + if (! mm) return 1; - vsid = get_vsid(current->mm->context.id, ea); + vsid = get_vsid(mm->context.id, ea); } - __ste_allocate(GET_ESID(ea), vsid); - /* Order update */ - asm volatile("sync":::"memory"); + stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); + + if (ea < KERNELBASE) { + offset = __get_cpu_var(stab_cache_ptr); + if (offset < NR_STAB_CACHE_ENTRIES) + __get_cpu_var(stab_cache[offset++]) = stab_entry; + else + offset = NR_STAB_CACHE_ENTRIES+1; + __get_cpu_var(stab_cache_ptr) = offset; + + /* Order update */ + asm volatile("sync":::"memory"); + } return 0; } +int ste_allocate(unsigned long ea) +{ + return __ste_allocate(ea, current->mm); +} + /* * preload some userspace segments into the segment table. */ @@ -160,34 +159,24 @@ unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; - unsigned long vsid; if (test_tsk_thread_flag(tsk, TIF_32BIT)) unmapped_base = TASK_UNMAPPED_BASE_USER32; else unmapped_base = TASK_UNMAPPED_BASE_USER64; - if (!IS_VALID_EA(pc) || (pc >= KERNELBASE)) - return; - vsid = get_vsid(mm->context.id, pc); - __ste_allocate(GET_ESID(pc), vsid); + __ste_allocate(pc, mm); if (GET_ESID(pc) == GET_ESID(stack)) return; - if (!IS_VALID_EA(stack) || (stack >= KERNELBASE)) - return; - vsid = get_vsid(mm->context.id, stack); - __ste_allocate(GET_ESID(stack), vsid); + __ste_allocate(stack, mm); if ((GET_ESID(pc) == GET_ESID(unmapped_base)) || (GET_ESID(stack) == GET_ESID(unmapped_base))) return; - if (!IS_VALID_EA(unmapped_base) || (unmapped_base >= KERNELBASE)) - return; - vsid = get_vsid(mm->context.id, unmapped_base); - __ste_allocate(GET_ESID(unmapped_base), vsid); + __ste_allocate(unmapped_base, mm); /* Order update */ asm volatile("sync" : : : "memory"); -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From david at gibson.dropbear.id.au Tue Aug 3 12:15:57 2004 From: david at gibson.dropbear.id.au (David Gibson) Date: Tue, 3 Aug 2004 12:15:57 +1000 Subject: [5/5] STAB cleanup - replace flush_stab() with switch_stab() In-Reply-To: <20040803021524.GF3056@zax> References: <20040803021235.GB3056@zax> <20040803021328.GC3056@zax> <20040803021347.GD3056@zax> <20040803021409.GE3056@zax> <20040803021524.GF3056@zax> Message-ID: <20040803021557.GG3056@zax> preload_stab() is only ever called (once) from flush_stab(), and flush_stab() is only ever called from switch_mm(). So, combine both functions into the more accurately named switch_stab(), called from switch_mm(). Signed-off-by: David Gibson Index: working-2.6/include/asm-ppc64/mmu_context.h =================================================================== --- working-2.6.orig/include/asm-ppc64/mmu_context.h 2004-07-30 14:13:32.932890776 +1000 +++ working-2.6/include/asm-ppc64/mmu_context.h 2004-07-30 14:13:35.017837008 +1000 @@ -135,7 +135,7 @@ spin_unlock_irqrestore(&mmu_context_queue.lock, flags); } -extern void flush_stab(struct task_struct *tsk, struct mm_struct *mm); +extern void switch_stab(struct task_struct *tsk, struct mm_struct *mm); extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); /* @@ -163,7 +163,7 @@ if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) switch_slb(tsk, next); else - flush_stab(tsk, next); + switch_stab(tsk, next); } #define deactivate_mm(tsk,mm) do { } while (0) Index: working-2.6/arch/ppc64/mm/stab.c =================================================================== --- working-2.6.orig/arch/ppc64/mm/stab.c 2004-07-30 14:13:34.968844456 +1000 +++ working-2.6/arch/ppc64/mm/stab.c 2004-07-30 14:13:55.314811992 +1000 @@ -152,42 +148,18 @@ } /* - * preload some userspace segments into the segment table. + * Do the segment table work for a context switch: flush all user + * entries from the table, then preload some probably useful entries + * for the new task */ -static void preload_stab(struct task_struct *tsk, struct mm_struct *mm) -{ - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long unmapped_base; - - if (test_tsk_thread_flag(tsk, TIF_32BIT)) - unmapped_base = TASK_UNMAPPED_BASE_USER32; - else - unmapped_base = TASK_UNMAPPED_BASE_USER64; - - __ste_allocate(pc, mm); - - if (GET_ESID(pc) == GET_ESID(stack)) - return; - - __ste_allocate(stack, mm); - - if ((GET_ESID(pc) == GET_ESID(unmapped_base)) - || (GET_ESID(stack) == GET_ESID(unmapped_base))) - return; - - __ste_allocate(unmapped_base, mm); - - /* Order update */ - asm volatile("sync" : : : "memory"); -} - -/* Flush all user entries from the segment table of the current processor. */ -void flush_stab(struct task_struct *tsk, struct mm_struct *mm) +void switch_stab(struct task_struct *tsk, struct mm_struct *mm) { struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; struct stab_entry *ste; unsigned long offset = __get_cpu_var(stab_cache_ptr); + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); @@ -222,7 +194,27 @@ __get_cpu_var(stab_cache_ptr) = 0; - preload_stab(tsk, mm); + /* Now preload some entries for the new task */ + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; + else + unmapped_base = TASK_UNMAPPED_BASE_USER64; + + __ste_allocate(pc, mm); + + if (GET_ESID(pc) == GET_ESID(stack)) + return; + + __ste_allocate(stack, mm); + + if ((GET_ESID(pc) == GET_ESID(unmapped_base)) + || (GET_ESID(stack) == GET_ESID(unmapped_base))) + return; + + __ste_allocate(unmapped_base, mm); + + /* Order update */ + asm volatile("sync" : : : "memory"); } extern void slb_initialize(void); -- David Gibson | For every complex problem there is a david AT gibson.dropbear.id.au | solution which is simple, neat and | wrong. http://www.ozlabs.org/people/dgibson ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Tue Aug 3 18:21:41 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Tue, 03 Aug 2004 01:21:41 -0700 Subject: [PATCH] suppress 'store_purr' unused warning Message-ID: <1091521301.27397.3660.camel@nighthawk> Some new sysfs macros declare a store_purr() function that never gets used, which makes a compiler warning happen. Suppress the warning with an unused attribute. Can you tell that I'm using compiler warnings heavily in my CONFIG_NONLINEAR debugging? :) -- Dave -------------- next part -------------- A non-text attachment was scrubbed... Name: A3-ppc64-store_purr_unused.patch Type: text/x-patch Size: 842 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040803/2a3c317e/attachment.bin From sharada at in.ibm.com Tue Aug 3 23:15:27 2004 From: sharada at in.ibm.com (R Sharada) Date: Tue, 3 Aug 2004 18:45:27 +0530 Subject: cpumask move patch - RFC Message-ID: <20040803131527.GA4056@in.ibm.com> Hello, This is part of an attempt to clean up some of the kernel data structure initialisations from prom.c and move to later boot code. This patch moves the cpumask data structure initialisations from prom_hold_cpus() to setup_system(). The patch is against the 2.6.8-rc2 linus bitkeeper tree. Kindly request review and comments on this patch. Thanks and Regards, Sharada -------------- next part -------------- diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c 2004-08-03 02:12:58.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c 2004-08-04 07:15:25.747997352 -0700 @@ -77,6 +77,8 @@ void pSeries_calibrate_decr(void); void fwnmi_init(void); extern void SystemReset_FWNMI(void), MachineCheck_FWNMI(void); /* from head.S */ +void cpumask_setup(void); + int fwnmi_active; /* TRUE if an FWNMI handler is present */ dev_t boot_dev; @@ -468,3 +470,91 @@ setup_default_decr(); } + +void cpumask_setup() +{ + unsigned long ind; + struct device_node *np = NULL; + int cpuid = 0; + unsigned int *reg = NULL; + char *statusp = NULL; + int *propsize = NULL; + unsigned int cpu_threads; + + printk(KERN_INFO "cpumask_setup\n"); + /* On pmac, we just fill out the various global bitmasks and + * arrays indicating our CPUs are here, they are actually started + * later on from pmac_smp + */ + if (systemcfg->platform == PLATFORM_POWERMAC) { + while ((np = of_find_node_by_type(np, "cpu"))) { + reg = (unsigned int *)get_property(np, "reg", NULL); +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (*reg == 0) + cpu_set(cpuid, cpu_online_map); +#endif /* CONFIG_SMP */ + cpuid++; + } + of_node_put(np); + return; + } + + while ((np = of_find_node_by_type(np, "cpu"))) { + + statusp = (char *)get_property(np, "status", NULL); + if (strcmp(statusp, "okay") != 0) + continue; + + reg = (unsigned int *)get_property(np, "reg", NULL); + + propsize = (int *)get_property(np, "ibm,ppc-interrupt-server#s", NULL); + if (*propsize < 0) { + /* no property. old hardware has no SMT */ + cpu_threads = 1; + } else { + /* We have a threaded processor */ + cpu_threads = *propsize / sizeof(u32); + if (cpu_threads > 2) + cpu_threads = 1; /* ToDo: panic? */ + } + +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (cpuid == boot_cpuid) + cpu_set(cpuid, cpu_online_map); + + /* set the secondary threads into the cpuid mask */ + for (ind=1; ind < cpu_threads; ind++) { + cpuid++; + if (cpuid >= NR_CPUS) + continue; + if (naca->smt_state) { + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_present_at_boot); + } + } +#endif + cpuid++; + } + of_node_put(np); + +#ifdef CONFIG_HMT + /* Only enable HMT on processors that provide support. */ + if (__is_processor(PV_PULSAR) || + __is_processor(PV_ICESTAR) || + __is_processor(PV_SSTAR)) { + + for (ind = 0; ind < NR_CPUS; ind += 2) { + if (!cpu_online(ind)) + continue; + cpu_set(ind+1, cpu_possible_map); + } + } +#endif + return; +} diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c 2004-08-04 06:10:30.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c 2004-08-04 06:12:56.000000000 -0700 @@ -939,13 +939,6 @@ prom_getprop(node, "reg", ®, sizeof(reg)); lpaca[cpuid].hw_cpu_id = reg; -#ifdef CONFIG_SMP - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); - if (reg == 0) - cpu_set(cpuid, RELOC(cpu_online_map)); -#endif /* CONFIG_SMP */ cpuid++; } return; @@ -1042,9 +1035,6 @@ #ifdef CONFIG_SMP /* Set the number of active processors. */ _systemcfg->processorCount++; - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); #endif } else { prom_printf("... failed: %x\n", *acknowledge); @@ -1053,10 +1043,6 @@ #ifdef CONFIG_SMP else { prom_printf("%x : booting cpu %s\n", cpuid, path); - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_online_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); } #endif next: @@ -1070,8 +1056,6 @@ prom_printf("%x : preparing thread ... ", interrupt_server[i]); if (_naca->smt_state) { - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); prom_printf("available\n"); } else { prom_printf("not available\n"); @@ -1102,7 +1086,6 @@ } } /* cpu_set(i+1, cpu_online_map); */ - cpu_set(i+1, RELOC(cpu_possible_map)); } _systemcfg->processorCount *= 2; } else { diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c 2004-08-03 02:12:59.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c 2004-08-04 06:15:27.000000000 -0700 @@ -76,6 +76,7 @@ extern void pseries_secondary_smp_init(unsigned long); extern int idle_setup(void); extern void vpa_init(int cpu); +extern void cpumask_setup(void); unsigned long decr_overclock = 1; unsigned long decr_overclock_proc0 = 1; @@ -229,6 +230,7 @@ register_console(&udbg_console); __irq_offset_value = NUM_ISA_INTERRUPTS; finish_device_tree(); + cpumask_setup(); chrp_init(r3, r4, r5, r6, r7); #ifdef CONFIG_SMP @@ -251,6 +253,7 @@ #ifdef CONFIG_PPC_PMAC if (systemcfg->platform == PLATFORM_POWERMAC) { finish_device_tree(); + cpumask_setup(); pmac_init(r3, r4, r5, r6, r7); } #endif /* CONFIG_PPC_PMAC */ From nathanl at austin.ibm.com Wed Aug 4 09:18:28 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Tue, 3 Aug 2004 18:18:28 -0500 Subject: [patch 1/4] Use platform numbering of cpus for hypervisor calls Message-ID: <200408032317.i73NHksP073408@austin.ibm.com> We were using Linux's cpu numbering for cpu-related hypervisor calls (e.g. vpa registration, H_CONFER). It happened to work most of the time because Linux and the hypervisor usually, but not always, have the same numbering for cpus. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/smp.c~ppc64_fix_hcall_cpuids arch/ppc64/kernel/smp.c --- 2.6-tip/arch/ppc64/kernel/smp.c~ppc64_fix_hcall_cpuids 2004-08-03 18:06:53.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/smp.c 2004-08-03 18:06:53.000000000 -0500 @@ -487,11 +487,11 @@ static void __init smp_space_timers(unsi #ifdef CONFIG_PPC_PSERIES void vpa_init(int cpu) { - unsigned long flags; + unsigned long flags, pcpu = get_hard_smp_processor_id(cpu); /* Register the Virtual Processor Area (VPA) */ flags = 1UL << (63 - 18); - register_vpa(flags, cpu, __pa((unsigned long)&(paca[cpu].lppaca))); + register_vpa(flags, pcpu, __pa((unsigned long)&(paca[cpu].lppaca))); } static inline void smp_xics_do_message(int cpu, int msg) diff -puN arch/ppc64/lib/locks.c~ppc64_fix_hcall_cpuids arch/ppc64/lib/locks.c --- 2.6-tip/arch/ppc64/lib/locks.c~ppc64_fix_hcall_cpuids 2004-08-03 18:06:53.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/lib/locks.c 2004-08-03 18:06:53.000000000 -0500 @@ -63,7 +63,8 @@ void __spin_yield(spinlock_t *lock) HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, ((u64)holder_cpu << 32) | yield_count); #else - plpar_hcall_norets(H_CONFER, holder_cpu, yield_count); + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), + yield_count); #endif } @@ -179,7 +180,8 @@ void __rw_yield(rwlock_t *rw) HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, ((u64)holder_cpu << 32) | yield_count); #else - plpar_hcall_norets(H_CONFER, holder_cpu, yield_count); + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), + yield_count); #endif } _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Wed Aug 4 09:18:34 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Tue, 3 Aug 2004 18:18:34 -0500 Subject: [patch 2/4] Use cpu_present_map in ppc64 Message-ID: <200408032317.i73NHqsP079046@austin.ibm.com> Adopt the "standard" cpu_present_map for describing cpus which are present in the system, but not necessarily online. cpu_present_map is meant to be a superset of cpu_online_map and a subset of cpu_possible_map. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/prom.c~ppc64-add-cpu_present_map arch/ppc64/kernel/prom.c --- 2.6-tip/arch/ppc64/kernel/prom.c~ppc64-add-cpu_present_map 2004-08-03 18:07:06.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/prom.c 2004-08-03 18:07:06.000000000 -0500 @@ -943,6 +943,7 @@ static void __init prom_hold_cpus(unsign cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); + cpu_set(cpuid, RELOC(cpu_present_map)); if (reg == 0) cpu_set(cpuid, RELOC(cpu_online_map)); #endif /* CONFIG_SMP */ @@ -1045,6 +1046,7 @@ static void __init prom_hold_cpus(unsign cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); + cpu_set(cpuid, RELOC(cpu_present_map)); #endif } else { prom_printf("... failed: %x\n", *acknowledge); @@ -1057,6 +1059,7 @@ static void __init prom_hold_cpus(unsign cpu_set(cpuid, RELOC(cpu_possible_map)); cpu_set(cpuid, RELOC(cpu_online_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); + cpu_set(cpuid, RELOC(cpu_present_map)); } #endif next: @@ -1072,6 +1075,7 @@ next: if (_naca->smt_state) { cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); + cpu_set(cpuid, RELOC(cpu_present_map)); prom_printf("available\n"); } else { prom_printf("not available\n"); @@ -1103,6 +1107,7 @@ next: } /* cpu_set(i+1, cpu_online_map); */ cpu_set(i+1, RELOC(cpu_possible_map)); + cpu_set(i+1, RELOC(cpu_present_map)); } _systemcfg->processorCount *= 2; } else { diff -puN arch/ppc64/kernel/smp.c~ppc64-add-cpu_present_map arch/ppc64/kernel/smp.c --- 2.6-tip/arch/ppc64/kernel/smp.c~ppc64-add-cpu_present_map 2004-08-03 18:07:06.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/smp.c 2004-08-03 18:07:06.000000000 -0500 @@ -127,6 +127,7 @@ static int smp_iSeries_numProcs(void) cpu_set(i, cpu_available_map); cpu_set(i, cpu_possible_map); cpu_set(i, cpu_present_at_boot); + cpu_set(i, cpu_present_map); ++np; } } _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Wed Aug 4 09:18:40 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Tue, 3 Aug 2004 18:18:40 -0500 Subject: [patch 3/4] Rework secondary SMT thread setup at boot Message-ID: <200408032317.i73NHwsP080586@austin.ibm.com> Our (ab)use of cpu_possible_map in setup_system to start secondary SMT threads bothers me. Mark such threads in cpu_possible_map during early boot; let RTAS tell us which present cpus are still offline later so we can start them. Also change query_cpu_stopped to not BUG when the RTAS query-cpu-stopped-state token is not available. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/prom.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/prom.c --- 2.6-tip/arch/ppc64/kernel/prom.c~ppc64-fix-secondary-smt-thread-setup 2004-08-03 18:07:13.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/prom.c 2004-08-03 18:07:13.000000000 -0500 @@ -1076,6 +1076,8 @@ next: cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); + cpu_set(cpuid, RELOC(cpu_possible_map)); + _systemcfg->processorCount++; prom_printf("available\n"); } else { prom_printf("not available\n"); diff -puN arch/ppc64/kernel/setup.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/setup.c --- 2.6-tip/arch/ppc64/kernel/setup.c~ppc64-fix-secondary-smt-thread-setup 2004-08-03 18:07:13.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/setup.c 2004-08-03 18:07:13.000000000 -0500 @@ -232,16 +232,17 @@ void setup_system(unsigned long r3, unsi chrp_init(r3, r4, r5, r6, r7); #ifdef CONFIG_SMP - /* Start secondary threads on SMT systems */ - for (i = 0; i < NR_CPUS; i++) { - if (cpu_available(i) && !cpu_possible(i)) { + /* Start secondary threads on SMT systems; primary threads + * are already in the running state. + */ + for_each_present_cpu(i) { + if (query_cpu_stopped + (get_hard_smp_processor_id(i)) == 0) { printk("%16.16x : starting thread\n", i); rtas_call(rtas_token("start-cpu"), 3, 1, &ret, get_hard_smp_processor_id(i), (u32)*((unsigned long *)pseries_secondary_smp_init), i); - cpu_set(i, cpu_possible_map); - systemcfg->processorCount++; } } #endif /* CONFIG_SMP */ diff -puN arch/ppc64/kernel/smp.c~ppc64-fix-secondary-smt-thread-setup arch/ppc64/kernel/smp.c --- 2.6-tip/arch/ppc64/kernel/smp.c~ppc64-fix-secondary-smt-thread-setup 2004-08-03 18:07:13.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/smp.c 2004-08-03 18:07:13.000000000 -0500 @@ -225,7 +225,6 @@ static void __devinit smp_openpic_setup_ do_openpic_setup_cpu(); } -#ifdef CONFIG_HOTPLUG_CPU /* Get state of physical CPU. * Return codes: * 0 - The processor is in the RTAS stopped state @@ -234,13 +233,14 @@ static void __devinit smp_openpic_setup_ * -1 - Hardware Error * -2 - Hardware Busy, Try again later. */ -static int query_cpu_stopped(unsigned int pcpu) +int query_cpu_stopped(unsigned int pcpu) { int cpu_status; int status, qcss_tok; qcss_tok = rtas_token("query-cpu-stopped-state"); - BUG_ON(qcss_tok == RTAS_UNKNOWN_SERVICE); + if (qcss_tok == RTAS_UNKNOWN_SERVICE) + return -1; status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); if (status != 0) { printk(KERN_ERR @@ -251,6 +251,8 @@ static int query_cpu_stopped(unsigned in return cpu_status; } +#ifdef CONFIG_HOTPLUG_CPU + int __cpu_disable(void) { /* FIXME: go put this in a header somewhere */ diff -puN include/asm-ppc64/smp.h~ppc64-fix-secondary-smt-thread-setup include/asm-ppc64/smp.h --- 2.6-tip/include/asm-ppc64/smp.h~ppc64-fix-secondary-smt-thread-setup 2004-08-03 18:07:13.000000000 -0500 +++ 2.6-tip-nathanl/include/asm-ppc64/smp.h 2004-08-03 18:07:13.000000000 -0500 @@ -73,6 +73,7 @@ void smp_init_pSeries(void); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern void cpu_die(void) __attribute__((noreturn)); +extern int query_cpu_stopped(unsigned int pcpu); #endif /* !(CONFIG_SMP) */ #define get_hard_smp_processor_id(CPU) (paca[(CPU)].hw_cpu_id) _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Wed Aug 4 09:18:46 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Tue, 3 Aug 2004 18:18:46 -0500 Subject: [patch 4/4] Remove unnecessary cpu maps (available, present_at_boot) Message-ID: <200408032318.i73NI4sP072766@austin.ibm.com> With cpu_present_map, we don't need these any longer. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/prom.c~ppc64-remove-unnecessary-cpu-maps arch/ppc64/kernel/prom.c --- 2.6-tip/arch/ppc64/kernel/prom.c~ppc64-remove-unnecessary-cpu-maps 2004-08-03 18:07:20.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/prom.c 2004-08-03 18:07:20.000000000 -0500 @@ -940,9 +940,7 @@ static void __init prom_hold_cpus(unsign lpaca[cpuid].hw_cpu_id = reg; #ifdef CONFIG_SMP - cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); if (reg == 0) cpu_set(cpuid, RELOC(cpu_online_map)); @@ -1043,9 +1041,7 @@ static void __init prom_hold_cpus(unsign #ifdef CONFIG_SMP /* Set the number of active processors. */ _systemcfg->processorCount++; - cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); #endif } else { @@ -1055,10 +1051,8 @@ static void __init prom_hold_cpus(unsign #ifdef CONFIG_SMP else { prom_printf("%x : booting cpu %s\n", cpuid, path); - cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); cpu_set(cpuid, RELOC(cpu_online_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); } #endif @@ -1073,8 +1067,6 @@ next: prom_printf("%x : preparing thread ... ", interrupt_server[i]); if (_naca->smt_state) { - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); cpu_set(cpuid, RELOC(cpu_present_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); _systemcfg->processorCount++; diff -puN arch/ppc64/kernel/smp.c~ppc64-remove-unnecessary-cpu-maps arch/ppc64/kernel/smp.c --- 2.6-tip/arch/ppc64/kernel/smp.c~ppc64-remove-unnecessary-cpu-maps 2004-08-03 18:07:20.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/smp.c 2004-08-03 18:07:20.000000000 -0500 @@ -59,8 +59,6 @@ unsigned long cache_decay_ticks; cpumask_t cpu_possible_map = CPU_MASK_NONE; cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t cpu_available_map = CPU_MASK_NONE; -cpumask_t cpu_present_at_boot = CPU_MASK_NONE; EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(cpu_possible_map); @@ -124,9 +122,7 @@ static int smp_iSeries_numProcs(void) np = 0; for (i=0; i < NR_CPUS; ++i) { if (paca[i].lppaca.xDynProcStatus < 2) { - cpu_set(i, cpu_available_map); cpu_set(i, cpu_possible_map); - cpu_set(i, cpu_present_at_boot); cpu_set(i, cpu_present_map); ++np; } @@ -878,7 +874,7 @@ int __devinit __cpu_up(unsigned int cpu) int c; /* At boot, don't bother with non-present cpus -JSCHOPP */ - if (system_state == SYSTEM_BOOTING && !cpu_present_at_boot(cpu)) + if (system_state == SYSTEM_BOOTING && !cpu_present(cpu)) return -ENOENT; paca[cpu].prof_counter = 1; diff -puN arch/ppc64/kernel/xics.c~ppc64-remove-unnecessary-cpu-maps arch/ppc64/kernel/xics.c --- 2.6-tip/arch/ppc64/kernel/xics.c~ppc64-remove-unnecessary-cpu-maps 2004-08-03 18:07:20.000000000 -0500 +++ 2.6-tip-nathanl/arch/ppc64/kernel/xics.c 2004-08-03 18:07:20.000000000 -0500 @@ -548,7 +548,7 @@ nextnode: #ifdef CONFIG_SMP for_each_cpu(i) { /* FIXME: Do this dynamically! --RR */ - if (!cpu_present_at_boot(i)) + if (!cpu_present(i)) continue; xics_per_cpu[i] = __ioremap((ulong)inodes[get_hard_smp_processor_id(i)].addr, (ulong)inodes[get_hard_smp_processor_id(i)].size, diff -puN include/asm-ppc64/smp.h~ppc64-remove-unnecessary-cpu-maps include/asm-ppc64/smp.h --- 2.6-tip/include/asm-ppc64/smp.h~ppc64-remove-unnecessary-cpu-maps 2004-08-03 18:07:20.000000000 -0500 +++ 2.6-tip-nathanl/include/asm-ppc64/smp.h 2004-08-03 18:07:20.000000000 -0500 @@ -36,23 +36,6 @@ extern void smp_message_recv(int, struct #define smp_processor_id() (get_paca()->paca_index) #define hard_smp_processor_id() (get_paca()->hw_cpu_id) -/* - * Retrieve the state of a CPU: - * online: CPU is in a normal run state - * possible: CPU is a candidate to be made online - * available: CPU is candidate for the 'possible' pool - * Used to get SMT threads started at boot time. - * present_at_boot: CPU was available at boot time. Used in DLPAR - * code to handle special cases for processor start up. - */ -extern cpumask_t cpu_present_at_boot; -extern cpumask_t cpu_online_map; -extern cpumask_t cpu_possible_map; -extern cpumask_t cpu_available_map; - -#define cpu_present_at_boot(cpu) cpu_isset(cpu, cpu_present_at_boot) -#define cpu_available(cpu) cpu_isset(cpu, cpu_available_map) - /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. * * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Wed Aug 4 09:19:42 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Tue, 03 Aug 2004 18:19:42 -0500 Subject: [patch 1/4] Use platform numbering of cpus for hypervisor calls In-Reply-To: <200408032317.i73NHksP073408@austin.ibm.com> References: <200408032317.i73NHksP073408@austin.ibm.com> Message-ID: <1091575182.30125.1.camel@biclops.private.network> I should note that these patches are against 2.6.8-rc2-bk13. Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jhe at us.ibm.com Thu Aug 5 07:02:43 2004 From: jhe at us.ibm.com (John Engel) Date: Wed, 04 Aug 2004 16:02:43 -0500 Subject: [PATCH] 32-bit ptrace geteventmsg fix Message-ID: <41114EF3.5030607@us.ibm.com> Here's a fix for the 32-bit PTRACE_GETEVENTMSG ptrace call that handles the passing of a 32-bit user address. Please apply, thanks. Signed-off-by: John Engel --- linux-2.6.5-7.97/arch/ppc64/kernel/ptrace32.c.orig 2004-07-29 13:52:42.000000000 -0500 +++ linux-2.6.5-7.97/arch/ppc64/kernel/ptrace32.c 2004-07-29 13:43:02.000000000 -0500 @@ -413,7 +413,9 @@ break; } - + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned int __user *) data); + break; default: ret = ptrace_request(child, request, addr, data); ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Thu Aug 5 07:10:46 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 5 Aug 2004 07:10:46 +1000 Subject: [PATCH] 32-bit ptrace geteventmsg fix In-Reply-To: <41114EF3.5030607@us.ibm.com> References: <41114EF3.5030607@us.ibm.com> Message-ID: <20040804211046.GU30253@krispykreme> Hi John, Looks good. Anton -- John Engel : Here's a fix for the 32-bit PTRACE_GETEVENTMSG ptrace call that handles the passing of a 32-bit user address. Please apply, thanks. Signed-off-by: John Engel Signed-off-by: Anton Blanchard ===== arch/ppc64/kernel/ptrace32.c 1.11 vs edited ===== --- 1.11/arch/ppc64/kernel/ptrace32.c Thu Jun 10 16:21:41 2004 +++ edited/arch/ppc64/kernel/ptrace32.c Thu Aug 5 07:08:01 2004 @@ -404,7 +404,9 @@ break; } - + case PTRACE_GETEVENTMSG: + ret = put_user(child->ptrace_message, (unsigned int __user *) data); + break; default: ret = ptrace_request(child, request, addr, data); ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jhe at us.ibm.com Thu Aug 5 07:16:19 2004 From: jhe at us.ibm.com (John Engel) Date: Wed, 04 Aug 2004 16:16:19 -0500 Subject: [PATCH] [trivial] clear iSeries refcode field Message-ID: <41115223.6090305@us.ibm.com> This patch clears the refcode field when booting an iSeries partition. Please apply if there are no problems. Thanks. Signed-off-by: John Engel --- arch/ppc64/kernel/iSeries_setup.c.orig 2004-05-17 15:48:53.000000000 -0500 +++ arch/ppc64/kernel/iSeries_setup.c 2004-05-17 16:32:46.000000000 -0500 @@ -852,3 +852,12 @@ } } } + +int __init iSeries_src_init(void) +{ + /* clear the progress line */ + ppc_md.progress(" ", 0xffff); + return 0; +} + +late_initcall(iSeries_src_init); ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Thu Aug 5 07:30:54 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 5 Aug 2004 07:30:54 +1000 Subject: [PATCH] [trivial] clear iSeries refcode field In-Reply-To: <41115223.6090305@us.ibm.com> References: <41115223.6090305@us.ibm.com> Message-ID: <20040804213054.GV30253@krispykreme> Hi John, > This patch clears the refcode field when booting an iSeries partition. > Please apply if there are no problems. Thanks. > > Signed-off-by: John Engel Thanks, but it looks like this one is already in current BK. Anton > --- arch/ppc64/kernel/iSeries_setup.c.orig 2004-05-17 15:48:53.000000000 > -0500 > +++ arch/ppc64/kernel/iSeries_setup.c 2004-05-17 16:32:46.000000000 -0500 > @@ -852,3 +852,12 @@ > } > } > } > + > +int __init iSeries_src_init(void) > +{ > + /* clear the progress line */ > + ppc_md.progress(" ", 0xffff); > + return 0; > +} > + > +late_initcall(iSeries_src_init); ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Thu Aug 5 08:58:07 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 5 Aug 2004 08:58:07 +1000 Subject: [PATCH] suppress 'store_purr' unused warning In-Reply-To: <1091521301.27397.3660.camel@nighthawk> References: <1091521301.27397.3660.camel@nighthawk> Message-ID: <20040804225807.GW30253@krispykreme> Hi, > Some new sysfs macros declare a store_purr() function that never gets > used, which makes a compiler warning happen. Suppress the warning with > an unused attribute. Looks good to me, but could you use the __attribute_used__ compiler.h macro to guard against the ever changing gcc mind? Anton > Can you tell that I'm using compiler warnings heavily in my > CONFIG_NONLINEAR debugging? :) > > -- Dave > > > > > --- > > memhotplug-dave/arch/ppc64/kernel/sysfs.c | 4 ++-- > 1 files changed, 2 insertions(+), 2 deletions(-) > > diff -puN arch/ppc64/kernel/sysfs.c~A3-ppc64-store_purr_unused arch/ppc64/kernel/sysfs.c > --- memhotplug/arch/ppc64/kernel/sysfs.c~A3-ppc64-store_purr_unused Tue Aug 3 01:15:51 2004 > +++ memhotplug-dave/arch/ppc64/kernel/sysfs.c Tue Aug 3 01:17:17 2004 > @@ -209,8 +209,8 @@ static ssize_t show_##NAME(struct sys_de > unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ > return sprintf(buf, "%lx\n", val); \ > } \ > -static ssize_t store_##NAME(struct sys_device *dev, const char *buf, \ > - size_t count) \ > +static ssize_t __attribute__((unused)) \ > + store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ > { \ > struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ > unsigned long val; \ > > _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Thu Aug 5 09:13:40 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Wed, 04 Aug 2004 16:13:40 -0700 Subject: [PATCH] suppress 'store_purr' unused warning In-Reply-To: <20040804225807.GW30253@krispykreme> References: <1091521301.27397.3660.camel@nighthawk> <20040804225807.GW30253@krispykreme> Message-ID: <1091661220.27397.6860.camel@nighthawk> On Wed, 2004-08-04 at 15:58, Anton Blanchard wrote: > Hi, > > > Some new sysfs macros declare a store_purr() function that never gets > > used, which makes a compiler warning happen. Suppress the warning with > > an unused attribute. > > Looks good to me, but could you use the __attribute_used__ compiler.h > macro to guard against the ever changing gcc mind? Updated patch attached. -- Dave -------------- next part -------------- A non-text attachment was scrubbed... Name: A3-ppc64-store_purr_unused.patch Type: text/x-patch Size: 1050 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040804/5a5735d5/attachment.bin From anton at samba.org Thu Aug 5 12:56:49 2004 From: anton at samba.org (Anton Blanchard) Date: Thu, 5 Aug 2004 12:56:49 +1000 Subject: [PATCH] suppress 'store_purr' unused warning In-Reply-To: <1091661220.27397.6860.camel@nighthawk> References: <1091521301.27397.3660.camel@nighthawk> <20040804225807.GW30253@krispykreme> <1091661220.27397.6860.camel@nighthawk> Message-ID: <20040805025649.GE30253@krispykreme> > Updated patch attached. Thanks Dave, Anton -- Some new sysfs macros declare a store_purr() function that never gets used, which makes a compiler warning happen. Suppress the warning with the used attribute. Signed-off-by: Dave Hansen Signed-off-by: Anton Blanchard diff -puN arch/ppc64/kernel/sysfs.c~A3-ppc64-store_purr_unused arch/ppc64/kernel/sysfs.c --- memhotplug/arch/ppc64/kernel/sysfs.c~A3-ppc64-store_purr_unused Wed Aug 4 16:08:52 2004 +++ memhotplug-dave/arch/ppc64/kernel/sysfs.c Wed Aug 4 16:12:35 2004 @@ -209,8 +209,8 @@ static ssize_t show_##NAME(struct sys_de unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ return sprintf(buf, "%lx\n", val); \ } \ -static ssize_t store_##NAME(struct sys_device *dev, const char *buf, \ - size_t count) \ +static ssize_t __attribute_used__ \ + store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ { \ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ unsigned long val; \ _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From anton at samba.org Fri Aug 6 02:01:13 2004 From: anton at samba.org (Anton Blanchard) Date: Fri, 6 Aug 2004 02:01:13 +1000 Subject: [PATCH] remove linux,pci-domain from OFDT In-Reply-To: <1090883181.5914.4.camel@sinatra.austin.ibm.com> References: <1090883181.5914.4.camel@sinatra.austin.ibm.com> Message-ID: <20040805160112.GJ30253@krispykreme> Hi, > The patch below scraps the creation of the "linux,pci-domain" property in the > OF device tree for each PCI Host Bridge. This seems appropriate for the > following reasons: I think Martin wanted it for some userspace tools. However, if it isnt even making it into the device-tree, then I agree its a good candidate for removal :) Anton > 1) It isn't referenced/used in the kernel. > 2) It isn't exported to userspace, since it's added after /proc/device-tree > is created. > 3) Even if it was correctly exported to userspace, the same info is already > available in sysfs. > > Please apply, if there are no problems. > > Thanks- > John > > Signed-off-by: John Rose > > diff -Nru a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c > --- a/arch/ppc64/kernel/pSeries_pci.c Mon Jul 26 17:50:29 2004 > +++ b/arch/ppc64/kernel/pSeries_pci.c Mon Jul 26 17:50:29 2004 > @@ -402,7 +402,6 @@ > int *bus_range; > char *model; > enum phb_types phb_type; > - struct property *of_prop; > > model = (char *)get_property(dev, "model", NULL); > > @@ -448,21 +447,6 @@ > kfree(phb); > return NULL; > } > - > - of_prop = (struct property *)alloc_bootmem(sizeof(struct property) + > - sizeof(phb->global_number)); > - > - if (!of_prop) { > - kfree(phb); > - return NULL; > - } > - > - memset(of_prop, 0, sizeof(struct property)); > - of_prop->name = "linux,pci-domain"; > - of_prop->length = sizeof(phb->global_number); > - of_prop->value = (unsigned char *)&of_prop[1]; > - memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number)); > - prom_add_property(dev, of_prop); > > phb->first_busno = bus_range[0]; > phb->last_busno = bus_range[1]; ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From johnrose at austin.ibm.com Fri Aug 6 03:21:13 2004 From: johnrose at austin.ibm.com (John Rose) Date: Thu, 05 Aug 2004 12:21:13 -0500 Subject: [PATCH] remove linux,pci-domain from OFDT In-Reply-To: <20040805160112.GJ30253@krispykreme> References: <1090883181.5914.4.camel@sinatra.austin.ibm.com> <20040805160112.GJ30253@krispykreme> Message-ID: <1091726473.27121.11.camel@sinatra.austin.ibm.com> Hey - > I think Martin wanted it for some userspace tools. However, if it isnt > even making it into the device-tree, then I agree its a good candidate > for removal :) Talked to Martin. Apparently it is being exported through /proc, and his tools use it to correlate OF device tree path to pci name "xxxx:xx:xx.x". My mistake, please disregard my request to remove it :) Thanks- John ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Fri Aug 6 09:14:24 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Thu, 05 Aug 2004 18:14:24 -0500 Subject: cpumask move patch - RFC In-Reply-To: <20040803131527.GA4056@in.ibm.com> References: <20040803131527.GA4056@in.ibm.com> Message-ID: <1091747664.438.139.camel@pants.austin.ibm.com> On Tue, 2004-08-03 at 08:15, R Sharada wrote: > Hello, > This is part of an attempt to clean up some of the kernel data > structure initialisations from prom.c and move to later boot code. +void cpumask_setup() +{ + unsigned long ind; + struct device_node *np = NULL; + int cpuid = 0; + unsigned int *reg = NULL; + char *statusp = NULL; + int *propsize = NULL; The reg, statusp, and propsize initializations seem unnecessary. + cpuid++; + } + of_node_put(np); + return; Most of these of_node_put's are superfluous unless there's a chance you have explicitly broken out of the loop. + propsize = (int *)get_property(np, "ibm,ppc-interrupt-server#s", NULL); + if (*propsize < 0) { + /* no property. old hardware has no SMT */ + cpu_threads = 1; + } else { + /* We have a threaded processor */ + cpu_threads = *propsize / sizeof(u32); + if (cpu_threads > 2) + cpu_threads = 1; /* ToDo: panic? */ This is incorrect -- get_property does not return the size of the property; it stores the size in the third argument. The return value of get_property is a pointer to the kernel's copy of the property itself. While I agree in theory with removing all the cpumask initializations from prom_hold_cpus, I don't think simply transplanting the mess is the way to do it. Wouldn't it be nice to have one loop which works on pmac and pSeries, SMP and UP, without all those #ifdef's? Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From paulus at samba.org Fri Aug 6 11:54:05 2004 From: paulus at samba.org (Paul Mackerras) Date: Fri, 6 Aug 2004 11:54:05 +1000 Subject: cpumask move patch - RFC In-Reply-To: <1091747664.438.139.camel@pants.austin.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> Message-ID: <16658.58557.455887.141510@cargo.ozlabs.ibm.com> Nathan Lynch writes: > This is incorrect -- get_property does not return the size of the > property; it stores the size in the third argument. The return value of > get_property is a pointer to the kernel's copy of the property itself. Good point. I notice also that we need to check for statusp != NULL in this code: + statusp = (char *)get_property(np, "status", NULL); + if (strcmp(statusp, "okay") != 0) + continue; > While I agree in theory with removing all the cpumask initializations > from prom_hold_cpus, I don't think simply transplanting the mess is the > way to do it. Wouldn't it be nice to have one loop which works on pmac > and pSeries, SMP and UP, without all those #ifdef's? It would be nice, if it turns out to be possible. :) It's not clear that there is enough consistency between the device tree representations on the different platforms to be able to achieve that. I think what Sharada has done is a useful first step. At least now we can get rid of a bunch of RELOCs. Sharada, could you do a revised patch? Thanks, Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From sharada at in.ibm.com Fri Aug 6 14:26:27 2004 From: sharada at in.ibm.com (R Sharada) Date: Fri, 6 Aug 2004 09:56:27 +0530 Subject: cpumask move patch - RFC In-Reply-To: <16658.58557.455887.141510@cargo.ozlabs.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> Message-ID: <20040806042627.GA1288@in.ibm.com> Nathan, Paul, Thanks for your valuable feedback. I noted your comments and shall work on a revised patch and put it up for review soon. Thanks and Regards, Sharada On Fri, Aug 06, 2004 at 11:54:05AM +1000, Paul Mackerras wrote: > Nathan Lynch writes: > > > This is incorrect -- get_property does not return the size of the > > property; it stores the size in the third argument. The return value of > > get_property is a pointer to the kernel's copy of the property itself. > > Good point. > > I notice also that we need to check for statusp != NULL in this code: > > + statusp = (char *)get_property(np, "status", NULL); > + if (strcmp(statusp, "okay") != 0) > + continue; > > > > While I agree in theory with removing all the cpumask initializations > > from prom_hold_cpus, I don't think simply transplanting the mess is the > > way to do it. Wouldn't it be nice to have one loop which works on pmac > > and pSeries, SMP and UP, without all those #ifdef's? > > It would be nice, if it turns out to be possible. :) It's not clear > that there is enough consistency between the device tree > representations on the different platforms to be able to achieve that. > > I think what Sharada has done is a useful first step. At least now we > can get rid of a bunch of RELOCs. Sharada, could you do a revised > patch? > > Thanks, > Paul. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From sharada at in.ibm.com Mon Aug 9 16:30:16 2004 From: sharada at in.ibm.com (R Sharada) Date: Mon, 9 Aug 2004 12:00:16 +0530 Subject: cpumask move patch - RFC In-Reply-To: <1091747664.438.139.camel@pants.austin.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> Message-ID: <20040809063016.GA2256@in.ibm.com> Hello, On Thu, Aug 05, 2004 at 06:14:24PM -0500, Nathan Lynch wrote: > > On Tue, 2004-08-03 at 08:15, R Sharada wrote: > > Hello, > > This is part of an attempt to clean up some of the kernel data > > structure initialisations from prom.c and move to later boot code. > > +void cpumask_setup() > +{ > + unsigned long ind; > + struct device_node *np = NULL; > + int cpuid = 0; > + unsigned int *reg = NULL; > + char *statusp = NULL; > + int *propsize = NULL; > > The reg, statusp, and propsize initializations seem unnecessary. Don't we need to initialize the pointers for cleaniness sake? That was the only idea behind setting them to NULL. > > + cpuid++; > + } > + of_node_put(np); > + return; > > Most of these of_node_put's are superfluous unless there's a chance you > have explicitly broken out of the loop. One question here though. The of_node_put() calls in the patch are actually out of the while loop. I see that of_find_node_by_type() actually increments the node->users via the of_node_get() call, and decrements for the parent node; hence for the last node, we would still need to decrement the refcount, by calling of_node_put explicitly outside of the while loop, is it not? Or did I miss something? > > + propsize = (int *)get_property(np, "ibm,ppc-interrupt-server#s", NULL); > + if (*propsize < 0) { > + /* no property. old hardware has no SMT */ > + cpu_threads = 1; > + } else { > + /* We have a threaded processor */ > + cpu_threads = *propsize / sizeof(u32); > + if (cpu_threads > 2) > + cpu_threads = 1; /* ToDo: panic? */ > > This is incorrect -- get_property does not return the size of the > property; it stores the size in the third argument. The return value of > get_property is a pointer to the kernel's copy of the property itself. Thanks and yes, that was my mistake. I will change this to read the property size correctly from the get_property() call. > > While I agree in theory with removing all the cpumask initializations > from prom_hold_cpus, I don't think simply transplanting the mess is the > way to do it. Wouldn't it be nice to have one loop which works on pmac > and pSeries, SMP and UP, without all those #ifdef's? > I agree that what you suggest is the cleaner way to go instead of a plain copy. Well, I could remove the #ifdef SMP in the code. However, as regards merging pmac and pseries, the code for pmac does not seem to really check for the cpu status, etc. Is it not needed on pmac? I am not too aware of pmac and need to see the devicetree for pmac and understand if it is different from the pseries tree. > Nathan > > I am working on the changes and will post a revised patch soon. I might need to look a little more to merge the pseries and pmac stuff together. Thanks and Regards, Sharada ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From haveblue at us.ibm.com Wed Aug 11 09:42:48 2004 From: haveblue at us.ibm.com (Dave Hansen) Date: Tue, 10 Aug 2004 16:42:48 -0700 Subject: [PATCH] include profile.c in kernel/irq.c Message-ID: <1092181368.2813.26.camel@nighthawk> This is against 2.6.8-rc4-mm1 arch/ppc64/kernel/irq.c: In function `init_irq_proc': arch/ppc64/kernel/irq.c:797: warning: implicit declaration of function `create_prof_cpu_mask' -- Dave -------------- next part -------------- A non-text attachment was scrubbed... Name: ppc64-irq.c-include-profile.h.patch Type: text/x-patch Size: 376 bytes Desc: not available Url : http://ozlabs.org/pipermail/linuxppc64-dev/attachments/20040810/ce18bccc/attachment.bin From johnrose at austin.ibm.com Thu Aug 12 02:01:10 2004 From: johnrose at austin.ibm.com (John Rose) Date: Wed, 11 Aug 2004 11:01:10 -0500 Subject: [PATCH] promote hose_list to an "official" list Message-ID: <1092240070.3940.6.camel@sinatra.austin.ibm.com> This patch changes hose_list from a simple linked list to a "list.h"-style list. This is in preparation for the runtime addition/removal of PCI Host Bridges. Thanks- John Signed-off-by: John Rose diff -Nru a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c --- a/arch/ppc64/kernel/pSeries_iommu.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pSeries_iommu.c Tue Aug 10 18:14:50 2004 @@ -90,7 +90,7 @@ static void iommu_buses_init(void) { - struct pci_controller* phb; + struct pci_controller *phb, *tmp; struct device_node *dn, *first_dn; int num_slots, num_slots_ilog2; int first_phb = 1; @@ -109,7 +109,7 @@ /* XXX Should we be using pci_root_buses instead? -ojn */ - for (phb=hose_head; phb; phb=phb->next) { + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { first_dn = ((struct device_node *)phb->arch_data)->child; /* Carve 2GB into the largest dma_window_size possible */ diff -Nru a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c --- a/arch/ppc64/kernel/pSeries_pci.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pSeries_pci.c Tue Aug 10 18:14:50 2004 @@ -712,9 +712,9 @@ static void phbs_fixup_io(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose=hose_head;hose;hose=hose->next) + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) remap_bus_range(hose->bus); } @@ -747,8 +747,8 @@ pci_find_hose_for_OF_device(struct device_node *node) { while (node) { - struct pci_controller *hose; - for (hose=hose_head;hose;hose=hose->next) + struct pci_controller *hose, *tmp; + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) if (hose->arch_data == node) return hose; node=node->parent; diff -Nru a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c --- a/arch/ppc64/kernel/pci.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pci.c Tue Aug 10 18:14:50 2004 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -63,8 +64,7 @@ void iSeries_pcibios_init(void); -struct pci_controller *hose_head; -struct pci_controller **hose_tail = &hose_head; +LIST_HEAD(hose_list); struct pci_dma_ops pci_dma_ops; EXPORT_SYMBOL(pci_dma_ops); @@ -240,8 +240,8 @@ hose->type = controller_type; hose->global_number = global_phb_number++; - *hose_tail = hose; - hose_tail = &hose->next; + list_add_tail(&hose->list_node, &hose_list); + return hose; } @@ -281,7 +281,7 @@ static int __init pcibios_init(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; struct pci_bus *bus; #ifdef CONFIG_PPC_ISERIES @@ -292,7 +292,7 @@ printk("PCI: Probing PCI hardware\n"); /* Scan all of the recorded PCI controllers. */ - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { hose->last_busno = 0xff; bus = pci_scan_bus(hose->first_busno, hose->ops, hose->arch_data); diff -Nru a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h --- a/arch/ppc64/kernel/pci.h Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pci.h Tue Aug 10 18:14:50 2004 @@ -17,8 +17,6 @@ extern struct pci_controller* pci_alloc_pci_controller(enum phb_types controller_type); extern struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node); -extern struct pci_controller* hose_head; -extern struct pci_controller** hose_tail; extern int global_phb_number; diff -Nru a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c --- a/arch/ppc64/kernel/pci_dn.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pci_dn.c Tue Aug 10 18:14:50 2004 @@ -129,10 +129,10 @@ */ static void *traverse_all_pci_devices(traverse_func pre) { - struct pci_controller *phb; + struct pci_controller *phb, *tmp; void *ret; - for (phb = hose_head; phb; phb = phb->next) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) if ((ret = traverse_pci_devices(phb->arch_data, pre, phb)) != NULL) return ret; diff -Nru a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c --- a/arch/ppc64/kernel/pmac_pci.c Tue Aug 10 18:14:50 2004 +++ b/arch/ppc64/kernel/pmac_pci.c Tue Aug 10 18:14:50 2004 @@ -672,9 +672,9 @@ static void __init pmac_fixup_phb_resources(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; hose->io_resource.start += offset; hose->io_resource.end += offset; diff -Nru a/include/asm-ppc64/pci-bridge.h b/include/asm-ppc64/pci-bridge.h --- a/include/asm-ppc64/pci-bridge.h Tue Aug 10 18:14:50 2004 +++ b/include/asm-ppc64/pci-bridge.h Tue Aug 10 18:14:50 2004 @@ -33,9 +33,9 @@ struct pci_controller { char what[8]; /* Eye catcher */ enum phb_types type; /* Type of hardware */ - struct pci_controller *next; struct pci_bus *bus; void *arch_data; + struct list_head list_node; int first_busno; int last_busno; diff -Nru a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h --- a/include/asm-ppc64/pci.h Tue Aug 10 18:14:50 2004 +++ b/include/asm-ppc64/pci.h Tue Aug 10 18:14:50 2004 @@ -233,6 +233,8 @@ extern void pcibios_add_platform_entries(struct pci_dev *dev); +extern struct list_head hose_list; + #endif /* __KERNEL__ */ #endif /* __PPC64_PCI_H */ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Thu Aug 12 02:07:10 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Wed, 11 Aug 2004 11:07:10 -0500 Subject: [patch 1/2] Avoid calling scheduler from timer_interrupt on "offline" cpu Message-ID: <200408111607.i7BG7CSY039962@austin.ibm.com> When taking a cpu offline, once the cpu has been removed from cpu_online_map, it is not supposed to service any more interrupts. This presents a problem on ppc64 because we cannot truly disable the decrementer. There used to be cpu_is_offline() checks in several scheduler functions (e.g. rebalance_tick()) which papered over this issue, but these checks were removed recently. So with recent 2.6 kernels, an attempt to offline a cpu can result in a crash in find_busiest_group(): Turning cpu 2 to 0 cpu 0x2: Vector: 300 (Data Access) at [c00000003a4033e0] pc: c00000000004b988: .find_busiest_group+0x234/0x420 lr: c00000000004b8bc: .find_busiest_group+0x168/0x420 sp: c00000003a403660 msr: 8000000000001032 dar: 18 dsisr: 40000000 current = 0xc000000031fdf420 paca = 0xc000000000421200 pid = 8515, comm = kstopmachine enter ? for help 2:mon> t [c00000003a403660] c00000003a403720 (unreliable) [c00000003a403780] c00000000004bcf4 .load_balance+0x78/0x2c0 [c00000003a403840] c00000000004c3e4 .rebalance_tick+0x124/0x148 [c00000003a4038f0] c000000000060170 .update_process_times+0x44/0x60 [c00000003a403980] c00000000003ab64 .smp_local_timer_interrupt+0x40/0x50 [c00000003a4039f0] c000000000015eb4 .timer_interrupt+0x100/0x40c [c00000003a403ae0] c00000000000a2b4 Decrementer_common+0xb4/0x100 Exception: 901 (Decrementer) at c00000000007b008 .restart_machine+0x20/0x30 [c00000003a403dd0] 0000000000000000 (unreliable) [c00000003a403e50] c00000000007b0dc .do_stop+0xc4/0xc8 [c00000003a403ed0] c000000000070cc8 .kthread+0x11c/0x128 [c00000003a403f90] c0000000000194dc .kernel_thread+0x4c/0x68 This patch prevents such crashes. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/time.c~ppc64-timer_interrupt-handle-offline-cpu arch/ppc64/kernel/time.c --- 2.6.8-rc4/arch/ppc64/kernel/time.c~ppc64-timer_interrupt-handle-offline-cpu 2004-08-11 10:44:27.000000000 -0500 +++ 2.6.8-rc4-nathanl/arch/ppc64/kernel/time.c 2004-08-11 10:44:27.000000000 -0500 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -281,8 +282,20 @@ int timer_interrupt(struct pt_regs * reg while (lpaca->next_jiffy_update_tb <= (cur_tb = get_tb())) { #ifdef CONFIG_SMP - smp_local_timer_interrupt(regs); + /* + * We cannot disable the decrementer, so in the period + * between this cpu's being marked offline in cpu_online_map + * and calling stop-self, it is taking timer interrupts. + * Avoid calling into the scheduler rebalancing code if this + * is the case. + */ + if (!cpu_is_offline(cpu)) + smp_local_timer_interrupt(regs); #endif + /* + * No need to check whether cpu is offline here; boot_cpuid + * should have been fixed up by now. + */ if (cpu == boot_cpuid) { write_seqlock(&xtime_lock); tb_last_stamp = lpaca->next_jiffy_update_tb; _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Thu Aug 12 02:07:17 2004 From: nathanl at austin.ibm.com (nathanl at austin.ibm.com) Date: Wed, 11 Aug 2004 11:07:17 -0500 Subject: [patch 2/2] fix idle loop for offline cpu Message-ID: <200408111607.i7BG7JSY044136@austin.ibm.com> In the default_idle and dedicated_idle loops, there are some inner loops out of which we should break if the cpu is marked offline. Otherwise, it is possible for the cpu to get stuck and never actually go offline. shared_idle is unaffected. Signed-off-by: Nathan Lynch --- diff -puN arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu arch/ppc64/kernel/idle.c --- 2.6.8-rc4/arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu 2004-08-11 10:44:29.000000000 -0500 +++ 2.6.8-rc4-nathanl/arch/ppc64/kernel/idle.c 2004-08-11 10:44:29.000000000 -0500 @@ -132,6 +132,7 @@ int iSeries_idle(void) int default_idle(void) { long oldval; + unsigned int cpu = smp_processor_id(); while (1) { oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); @@ -139,7 +140,7 @@ int default_idle(void) if (!oldval) { set_thread_flag(TIF_POLLING_NRFLAG); - while (!need_resched()) { + while (!need_resched() && !cpu_is_offline(cpu)) { barrier(); HMT_low(); } @@ -151,8 +152,7 @@ int default_idle(void) } schedule(); - if (cpu_is_offline(smp_processor_id()) && - system_state == SYSTEM_RUNNING) + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } @@ -169,8 +169,9 @@ int dedicated_idle(void) struct paca_struct *lpaca = get_paca(), *ppaca; unsigned long start_snooze; unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + unsigned int cpu = smp_processor_id(); - ppaca = &paca[smp_processor_id() ^ 1]; + ppaca = &paca[cpu ^ 1]; while (1) { /* Indicate to the HV that we are idle. Now would be @@ -182,7 +183,7 @@ int dedicated_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); start_snooze = __get_tb() + *smt_snooze_delay * tb_ticks_per_usec; - while (!need_resched()) { + while (!need_resched() && !cpu_is_offline(cpu)) { /* need_resched could be 1 or 0 at this * point. If it is 0, set it to 0, so * an IPI/Prod is sent. If it is 1, keep @@ -241,8 +242,7 @@ int dedicated_idle(void) HMT_medium(); lpaca->lppaca.xIdle = 0; schedule(); - if (cpu_is_offline(smp_processor_id()) && - system_state == SYSTEM_RUNNING) + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } return 0; _ ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From johnrose at austin.ibm.com Thu Aug 12 02:22:20 2004 From: johnrose at austin.ibm.com (John Rose) Date: Wed, 11 Aug 2004 11:22:20 -0500 Subject: [PATCH] [correction] promote hose_list to an "official" list Message-ID: <1092241340.3940.12.camel@sinatra.austin.ibm.com> On second thought, no need to externalize the list to include/asm-ppc64/pci.h. Here's a corrected patch. This patch changes hose_list from a simple linked list to a "list.h"-style list. This is in preparation for the runtime addition/removal of PCI Host Bridges. Thanks- John diff -Nru a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c --- a/arch/ppc64/kernel/pSeries_iommu.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pSeries_iommu.c Wed Aug 11 10:54:25 2004 @@ -90,7 +90,7 @@ static void iommu_buses_init(void) { - struct pci_controller* phb; + struct pci_controller *phb, *tmp; struct device_node *dn, *first_dn; int num_slots, num_slots_ilog2; int first_phb = 1; @@ -109,7 +109,7 @@ /* XXX Should we be using pci_root_buses instead? -ojn */ - for (phb=hose_head; phb; phb=phb->next) { + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { first_dn = ((struct device_node *)phb->arch_data)->child; /* Carve 2GB into the largest dma_window_size possible */ diff -Nru a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c --- a/arch/ppc64/kernel/pSeries_pci.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pSeries_pci.c Wed Aug 11 10:54:25 2004 @@ -712,9 +712,9 @@ static void phbs_fixup_io(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose=hose_head;hose;hose=hose->next) + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) remap_bus_range(hose->bus); } @@ -747,8 +747,8 @@ pci_find_hose_for_OF_device(struct device_node *node) { while (node) { - struct pci_controller *hose; - for (hose=hose_head;hose;hose=hose->next) + struct pci_controller *hose, *tmp; + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) if (hose->arch_data == node) return hose; node=node->parent; diff -Nru a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c --- a/arch/ppc64/kernel/pci.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pci.c Wed Aug 11 10:54:25 2004 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -63,8 +64,7 @@ void iSeries_pcibios_init(void); -struct pci_controller *hose_head; -struct pci_controller **hose_tail = &hose_head; +LIST_HEAD(hose_list); struct pci_dma_ops pci_dma_ops; EXPORT_SYMBOL(pci_dma_ops); @@ -240,8 +240,8 @@ hose->type = controller_type; hose->global_number = global_phb_number++; - *hose_tail = hose; - hose_tail = &hose->next; + list_add_tail(&hose->list_node, &hose_list); + return hose; } @@ -281,7 +281,7 @@ static int __init pcibios_init(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; struct pci_bus *bus; #ifdef CONFIG_PPC_ISERIES @@ -292,7 +292,7 @@ printk("PCI: Probing PCI hardware\n"); /* Scan all of the recorded PCI controllers. */ - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { hose->last_busno = 0xff; bus = pci_scan_bus(hose->first_busno, hose->ops, hose->arch_data); diff -Nru a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h --- a/arch/ppc64/kernel/pci.h Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pci.h Wed Aug 11 10:54:25 2004 @@ -17,9 +17,7 @@ extern struct pci_controller* pci_alloc_pci_controller(enum phb_types controller_type); extern struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node); -extern struct pci_controller* hose_head; -extern struct pci_controller** hose_tail; - +extern struct list_head hose_list; extern int global_phb_number; /******************************************************************* diff -Nru a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c --- a/arch/ppc64/kernel/pci_dn.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pci_dn.c Wed Aug 11 10:54:25 2004 @@ -129,10 +129,10 @@ */ static void *traverse_all_pci_devices(traverse_func pre) { - struct pci_controller *phb; + struct pci_controller *phb, *tmp; void *ret; - for (phb = hose_head; phb; phb = phb->next) + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) if ((ret = traverse_pci_devices(phb->arch_data, pre, phb)) != NULL) return ret; diff -Nru a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c --- a/arch/ppc64/kernel/pmac_pci.c Wed Aug 11 10:54:25 2004 +++ b/arch/ppc64/kernel/pmac_pci.c Wed Aug 11 10:54:25 2004 @@ -672,9 +672,9 @@ static void __init pmac_fixup_phb_resources(void) { - struct pci_controller *hose; + struct pci_controller *hose, *tmp; - for (hose = hose_head; hose; hose = hose->next) { + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) { unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; hose->io_resource.start += offset; hose->io_resource.end += offset; diff -Nru a/include/asm-ppc64/pci-bridge.h b/include/asm-ppc64/pci-bridge.h --- a/include/asm-ppc64/pci-bridge.h Wed Aug 11 10:54:25 2004 +++ b/include/asm-ppc64/pci-bridge.h Wed Aug 11 10:54:25 2004 @@ -33,9 +33,9 @@ struct pci_controller { char what[8]; /* Eye catcher */ enum phb_types type; /* Type of hardware */ - struct pci_controller *next; struct pci_bus *bus; void *arch_data; + struct list_head list_node; int first_busno; int last_busno; ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jschopp at austin.ibm.com Thu Aug 12 04:30:59 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Wed, 11 Aug 2004 13:30:59 -0500 Subject: [patch 2/2] fix idle loop for offline cpu In-Reply-To: <200408111607.i7BG7JSY044136@austin.ibm.com> References: <200408111607.i7BG7JSY044136@austin.ibm.com> Message-ID: <411A65E3.8040101@austin.ibm.com> iSeries is not cpu DLPAR capable in Linux. The "i5" machine itself is, but it is not supported by #define CONFIG_PPC_ISERIES. You actually run the pSeries Linux on it. Because of this I think it is wasteful to check cpu_is_offline here. Other than that I think both patches are great. > diff -puN arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu arch/ppc64/kernel/idle.c > --- 2.6.8-rc4/arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu 2004-08-11 10:44:29.000000000 -0500 > +++ 2.6.8-rc4-nathanl/arch/ppc64/kernel/idle.c 2004-08-11 10:44:29.000000000 -0500 > @@ -132,6 +132,7 @@ int iSeries_idle(void) > int default_idle(void) > { > long oldval; > + unsigned int cpu = smp_processor_id(); > > while (1) { > oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); > @@ -139,7 +140,7 @@ int default_idle(void) > if (!oldval) { > set_thread_flag(TIF_POLLING_NRFLAG); > > - while (!need_resched()) { > + while (!need_resched() && !cpu_is_offline(cpu)) { > barrier(); > HMT_low(); > } > @@ -151,8 +152,7 @@ int default_idle(void) > } > > schedule(); > - if (cpu_is_offline(smp_processor_id()) && > - system_state == SYSTEM_RUNNING) > + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) > cpu_die(); > } > ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From nathanl at austin.ibm.com Thu Aug 12 04:45:54 2004 From: nathanl at austin.ibm.com (Nathan Lynch) Date: Wed, 11 Aug 2004 13:45:54 -0500 Subject: [patch 2/2] fix idle loop for offline cpu In-Reply-To: <411A65E3.8040101@austin.ibm.com> References: <200408111607.i7BG7JSY044136@austin.ibm.com> <411A65E3.8040101@austin.ibm.com> Message-ID: <1092249954.19239.2.camel@pants.austin.ibm.com> On Wed, 2004-08-11 at 13:30, Joel Schopp wrote: > iSeries is not cpu DLPAR capable in Linux. The "i5" machine itself is, > but it is not supported by #define CONFIG_PPC_ISERIES. You actually run > the pSeries Linux on it. Because of this I think it is wasteful to > check cpu_is_offline here. > > Other than that I think both patches are great. > > > diff -puN arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu arch/ppc64/kernel/idle.c > > --- 2.6.8-rc4/arch/ppc64/kernel/idle.c~ppc64-fix-idle-loop-for-offline-cpu 2004-08-11 10:44:29.000000000 -0500 > > +++ 2.6.8-rc4-nathanl/arch/ppc64/kernel/idle.c 2004-08-11 10:44:29.000000000 -0500 > > @@ -132,6 +132,7 @@ int iSeries_idle(void) > > int default_idle(void) > > { > > long oldval; > > + unsigned int cpu = smp_processor_id(); > > The diff output is confusing -- I did not modify iSeries_idle; the hunk to which you refer actually changes default_idle, which is used on Power 4 partitions. Nathan ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jschopp at austin.ibm.com Thu Aug 12 05:20:37 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Wed, 11 Aug 2004 14:20:37 -0500 Subject: [patch 2/2] fix idle loop for offline cpu In-Reply-To: <1092249954.19239.2.camel@pants.austin.ibm.com> References: <200408111607.i7BG7JSY044136@austin.ibm.com> <411A65E3.8040101@austin.ibm.com> <1092249954.19239.2.camel@pants.austin.ibm.com> Message-ID: <411A7185.5070002@austin.ibm.com> > The diff output is confusing -- I did not modify iSeries_idle; the hunk > to which you refer actually changes default_idle, which is used on Power > 4 partitions. You are right. Diff lied to me, it was default_idle, which is fine to modify. Patches look great. ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From sharada at in.ibm.com Thu Aug 12 22:17:03 2004 From: sharada at in.ibm.com (R Sharada) Date: Thu, 12 Aug 2004 17:47:03 +0530 Subject: cpumask move patch revised - RFC In-Reply-To: <20040806042627.GA1288@in.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> <20040806042627.GA1288@in.ibm.com> Message-ID: <20040812121703.GA9760@in.ibm.com> Hello, Based on the feedback, here is the revised cpumask patch that moves the cpumask initialization from prom_hold_cpus() to later boot, in setup_system(). The patch is against the 2.6.8-rc2 linus bitkeeper tree. - The get_property call has been corrected to obtain the property size from the correct argument. - The unnecessary variable initializations have been removed - check for NULL value of status incorporated. I have not removed the #ifdefs for SMP, as all the cpumask data structures, as I see them in code now, are defined for SMP systems and does not seem to be defined for UP. The merge of the POWERMAC and PSERIES #ifdefs is also deferred as I don't know a lot about the POWERMAC initialization and startup to see if the two cases can be merged. Please review and comment on the patch. Thanks and Regards, Sharada -------------- next part -------------- diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c 2004-08-03 02:12:58.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c 2004-08-13 06:02:22.808964544 -0700 @@ -77,6 +77,8 @@ void pSeries_calibrate_decr(void); void fwnmi_init(void); extern void SystemReset_FWNMI(void), MachineCheck_FWNMI(void); /* from head.S */ +void cpumask_setup(void); + int fwnmi_active; /* TRUE if an FWNMI handler is present */ dev_t boot_dev; @@ -468,3 +470,92 @@ setup_default_decr(); } + +void cpumask_setup() +{ + unsigned long ind; + struct device_node *np = NULL; + int cpuid = 0; + unsigned int *reg; + char *statusp; + int prop; + int *propsize = ∝ + unsigned int cpu_threads; + + printk(KERN_INFO "cpumask_setup\n"); + /* On pmac, we just fill out the various global bitmasks and + * arrays indicating our CPUs are here, they are actually started + * later on from pmac_smp + */ + if (systemcfg->platform == PLATFORM_POWERMAC) { + while ((np = of_find_node_by_type(np, "cpu"))) { + reg = (unsigned int *)get_property(np, "reg", NULL); +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (*reg == 0) + cpu_set(cpuid, cpu_online_map); +#endif /* CONFIG_SMP */ + cpuid++; + } + of_node_put(np); + return; + } + + while ((np = of_find_node_by_type(np, "cpu"))) { + + statusp = (char *)get_property(np, "status", NULL); + if ((statusp == NULL) || (statusp && strcmp(statusp, "okay") != 0)) + continue; + + reg = (unsigned int *)get_property(np, "reg", NULL); + + get_property(np, "ibm,ppc-interrupt-server#s", propsize); + if (*propsize < 0) { + /* no property. old hardware has no SMT */ + cpu_threads = 1; + } else { + /* We have a threaded processor */ + cpu_threads = *propsize / sizeof(u32); + if (cpu_threads > 2) + cpu_threads = 1; /* ToDo: panic? */ + } + +#ifdef CONFIG_SMP + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_possible_map); + cpu_set(cpuid, cpu_present_at_boot); + if (cpuid == boot_cpuid) + cpu_set(cpuid, cpu_online_map); + + /* set the secondary threads into the cpuid mask */ + for (ind=1; ind < cpu_threads; ind++) { + cpuid++; + if (cpuid >= NR_CPUS) + continue; + if (naca->smt_state) { + cpu_set(cpuid, cpu_available_map); + cpu_set(cpuid, cpu_present_at_boot); + } + } +#endif /* CONFIG_SMP */ + cpuid++; + } + of_node_put(np); + +#ifdef CONFIG_HMT + /* Only enable HMT on processors that provide support. */ + if (__is_processor(PV_PULSAR) || + __is_processor(PV_ICESTAR) || + __is_processor(PV_SSTAR)) { + + for (ind = 0; ind < NR_CPUS; ind += 2) { + if (!cpu_online(ind)) + continue; + cpu_set(ind+1, cpu_possible_map); + } + } +#endif + return; +} diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/prom.c 2004-08-04 06:10:30.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/prom.c 2004-08-12 23:52:47.000000000 -0700 @@ -939,13 +939,6 @@ prom_getprop(node, "reg", ®, sizeof(reg)); lpaca[cpuid].hw_cpu_id = reg; -#ifdef CONFIG_SMP - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); - if (reg == 0) - cpu_set(cpuid, RELOC(cpu_online_map)); -#endif /* CONFIG_SMP */ cpuid++; } return; @@ -1042,9 +1035,6 @@ #ifdef CONFIG_SMP /* Set the number of active processors. */ _systemcfg->processorCount++; - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); #endif } else { prom_printf("... failed: %x\n", *acknowledge); @@ -1053,10 +1043,6 @@ #ifdef CONFIG_SMP else { prom_printf("%x : booting cpu %s\n", cpuid, path); - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_online_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); } #endif next: @@ -1069,13 +1055,6 @@ lpaca[cpuid].hw_cpu_id = interrupt_server[i]; prom_printf("%x : preparing thread ... ", interrupt_server[i]); - if (_naca->smt_state) { - cpu_set(cpuid, RELOC(cpu_available_map)); - cpu_set(cpuid, RELOC(cpu_present_at_boot)); - prom_printf("available\n"); - } else { - prom_printf("not available\n"); - } } #endif cpuid++; @@ -1101,8 +1080,6 @@ pir & 0x3ff; } } -/* cpu_set(i+1, cpu_online_map); */ - cpu_set(i+1, RELOC(cpu_possible_map)); } _systemcfg->processorCount *= 2; } else { diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c --- linux-2.6.8-rc2-org/arch/ppc64/kernel/setup.c 2004-08-03 02:12:59.000000000 -0700 +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/setup.c 2004-08-04 06:15:27.000000000 -0700 @@ -76,6 +76,7 @@ extern void pseries_secondary_smp_init(unsigned long); extern int idle_setup(void); extern void vpa_init(int cpu); +extern void cpumask_setup(void); unsigned long decr_overclock = 1; unsigned long decr_overclock_proc0 = 1; @@ -229,6 +230,7 @@ register_console(&udbg_console); __irq_offset_value = NUM_ISA_INTERRUPTS; finish_device_tree(); + cpumask_setup(); chrp_init(r3, r4, r5, r6, r7); #ifdef CONFIG_SMP @@ -251,6 +253,7 @@ #ifdef CONFIG_PPC_PMAC if (systemcfg->platform == PLATFORM_POWERMAC) { finish_device_tree(); + cpumask_setup(); pmac_init(r3, r4, r5, r6, r7); } #endif /* CONFIG_PPC_PMAC */ From olh at suse.de Fri Aug 13 01:02:29 2004 From: olh at suse.de (Olaf Hering) Date: Thu, 12 Aug 2004 17:02:29 +0200 Subject: max openfirmware property size Message-ID: <20040812150229.GC28577@suse.de> Does the openfirmware spec say something about the maximum size of the 'name' and 'compatible' property? I guess the content lenght is undefined. -- USB is for mice, FireWire is for men! sUse lINUX ag, n?RNBERG ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From segher at kernel.crashing.org Fri Aug 13 01:52:50 2004 From: segher at kernel.crashing.org (Segher Boessenkool) Date: Thu, 12 Aug 2004 17:52:50 +0200 Subject: max openfirmware property size In-Reply-To: <20040812150229.GC28577@suse.de> References: <20040812150229.GC28577@suse.de> Message-ID: > Does the openfirmware spec say something about the maximum size of the > 'name' and 'compatible' property? > I guess the content lenght is undefined. It says nothing specific to "name" or "compatible". So their maximum size is the maximum size that fits into a cell, i.e. 0xffffffff on most systems. Segher ** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/ From jschopp at austin.ibm.com Fri Aug 13 05:05:03 2004 From: jschopp at austin.ibm.com (Joel Schopp) Date: Thu, 12 Aug 2004 14:05:03 -0500 Subject: cpumask move patch revised - RFC In-Reply-To: <20040812121703.GA9760@in.ibm.com> References: <20040803131527.GA4056@in.ibm.com> <1091747664.438.139.camel@pants.austin.ibm.com> <16658.58557.455887.141510@cargo.ozlabs.ibm.com> <20040806042627.GA1288@in.ibm.com> <20040812121703.GA9760@in.ibm.com> Message-ID: <411BBF5F.3070901@austin.ibm.com> This will surely conflict with Nathan's recent patch "[patch 4/4] Remove unnecessary cpu maps (available, present_at_boot)". I think Nathan's patch should go in first and yours reworked to match. Other comments inline below. R Sharada wrote: > Hello, > Based on the feedback, here is the revised cpumask patch that > moves the cpumask initialization from prom_hold_cpus() to later boot, in > setup_system(). > The patch is against the 2.6.8-rc2 linus bitkeeper tree. > > - The get_property call has been corrected to obtain the property size from > the correct argument. > - The unnecessary variable initializations have been removed > - check for NULL value of status incorporated. > > I have not removed the #ifdefs for SMP, as all the cpumask data structures, > as I see them in code now, are defined for SMP systems and does not seem to be > defined for UP. > The merge of the POWERMAC and PSERIES #ifdefs is also deferred as I don't > know a lot about the POWERMAC initialization and startup to see if the two > cases can be merged. > > Please review and comment on the patch. > > Thanks and Regards, > Sharada > > > ------------------------------------------------------------------------ > > diff -Naur linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c > --- linux-2.6.8-rc2-org/arch/ppc64/kernel/chrp_setup.c 2004-08-03 02:12:58.000000000 -0700 > +++ linux-2.6.8-rc2-chg/arch/ppc64/kernel/chrp_setup.c 2004-08-13 06:02:22.808964544 -0700 > @@ -77,6 +77,8 @@ > void pSeries_calibrate_decr(void); > void fwnmi_init(void); > extern void SystemReset_FWNMI(void), MachineCheck_FWNMI(void); /* from head.S */ > +void cpumask_setup(void); > + Is this really necessary? Might it go better in a .h file somewhere? > int fwnmi_active; /* TRUE if an FWNMI handler is present */ > > dev_t boot_dev; > @@ -468,3 +470,92 @@ > > setup_default_decr(); > } > + > +void cpumask_setup() > +{ > + unsigned long ind; > + struct device_node *np = NULL; > + int cpuid = 0; > + unsigned int *reg; > + char *statusp; > + int prop; > + int *propsize = ∝ > + unsigned int cpu_threads; > + > + printk(KERN_INFO "cpumask_setup\n"); > + /* On pmac, we just fill out the various global bitmasks and > + * arrays indicating our CPUs are here, they are actually started > + * later on from pmac_smp > + */ > + if (systemcfg->platform == PLATFORM_POWERMAC) { > + while ((np = of_find_node_by_type(np, "cpu"))) { > + reg = (unsigned int *)get_property(np, "reg", NULL); > +#ifdef CONFIG_SMP > + cpu_set(cpuid, cpu_available_map); > + cpu_set(cpuid, cpu_possible_map); > + cpu_set(cpuid, cpu_present_at_boot); > + if (*reg == 0) > + cpu_set(cpuid, cpu_online_map); > +#endif /* CONFIG_SMP */ > + cpuid++; > + } Shouldn't the whole while loop and of_node_put be in the #ifdef CONFIG_SMP, as otherwise all we do is iterate over the cpus not doing anything? > + of_node_put(np); > + return; > + } > + > + while ((np = of_find_node_by_type(np, "cpu"))) { > + > + statusp = (char *)get_property(n