Index: linux-2.6.15/arch/powerpc/kernel/asm-offsets.c =================================================================== --- linux-2.6.15.orig/arch/powerpc/kernel/asm-offsets.c 2006-02-14 14:35:37.402525224 -0800 +++ linux-2.6.15/arch/powerpc/kernel/asm-offsets.c 2006-02-14 14:40:39.429408608 -0800 @@ -142,6 +142,10 @@ DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); + DEFINE(PACA_STARTB, offsetof(struct paca_struct, start_tb)); + DEFINE(PACA_CDFLAG, offsetof(struct paca_struct, cdflag)); + DEFINE(PACA_DELTATB, offsetof(struct paca_struct, delta_tb)); + #endif /* CONFIG_PPC64 */ /* RTAS */ Index: linux-2.6.15/arch/powerpc/kernel/entry_64.S =================================================================== --- linux-2.6.15.orig/arch/powerpc/kernel/entry_64.S 2006-02-14 14:35:37.419522640 -0800 +++ linux-2.6.15/arch/powerpc/kernel/entry_64.S 2006-02-14 14:40:39.430408456 -0800 @@ -537,7 +537,19 @@ * r13 is our per cpu area, only restore it if we are returning to * userspace */ + beq 1f +BEGIN_FTR_SECTION + li r10,0 + stb r10,PACA_CDFLAG(r13) + mfspr r10,SPRN_PURR + ld r11,PACA_STARTB(r13) + sub r10,r10,r11 + ld r11,PACA_DELTATB(r13) + add r10,r11,r10 + std r10,PACA_DELTATB(r13) +END_FTR_SECTION_IFSET(CPU_FTR_PURR) + REST_GPR(13, r1) 1: ld r3,_CTR(r1) Index: linux-2.6.15/arch/powerpc/kernel/head_64.S =================================================================== --- linux-2.6.15.orig/arch/powerpc/kernel/head_64.S 2006-02-14 14:35:37.427521424 -0800 +++ linux-2.6.15/arch/powerpc/kernel/head_64.S 2006-02-14 14:40:39.432408152 -0800 @@ -276,6 +276,10 @@ mr r10,r1; /* Save r1 */ \ subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ beq- 1f; \ +BEGIN_FTR_SECTION \ + mfspr r1,SPRN_PURR; /* Collect cpu_util */ \ + std r1,PACA_STARTB(r13); /* Store mftb in paca */ \ +END_FTR_SECTION_IFSET(CPU_FTR_PURR) \ ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ 1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \ bge- cr1,bad_stack; /* abort if it is */ \ Index: linux-2.6.15/arch/powerpc/kernel/lparcfg.c =================================================================== --- linux-2.6.15.orig/arch/powerpc/kernel/lparcfg.c 2006-02-14 14:35:37.438519752 -0800 +++ linux-2.6.15/arch/powerpc/kernel/lparcfg.c 2006-02-14 14:40:39.433408000 -0800 @@ -37,7 +37,7 @@ #include #include -#define MODULE_VERS "1.6" +#define MODULE_VERS "1.7" #define MODULE_NAME "lparcfg" /* #define LPARCFG_DEBUG */ @@ -224,7 +224,7 @@ for_each_cpu(cpu) { cu = &per_cpu(cpu_usage_array, cpu); - sum_purr += cu->current_tb; + sum_purr += cu->current_cpu_util; } return sum_purr; } Index: linux-2.6.15/arch/powerpc/kernel/process.c =================================================================== --- linux-2.6.15.orig/arch/powerpc/kernel/process.c 2006-02-14 14:35:37.463515952 -0800 +++ linux-2.6.15/arch/powerpc/kernel/process.c 2006-02-15 14:50:47.683472424 -0800 @@ -245,6 +245,7 @@ struct thread_struct *new_thread, *old_thread; unsigned long flags; struct task_struct *last; + struct paca_struct *lpaca; #ifdef CONFIG_SMP /* avoid complexity of lazy save/restore of fpu @@ -315,19 +316,36 @@ new_thread = &new->thread; old_thread = ¤t->thread; -#ifdef CONFIG_PPC64 - /* - * Collect processor utilization data per process - */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); - long unsigned start_tb, current_tb; - start_tb = old_thread->start_tb; - cu->current_tb = current_tb = mfspr(SPRN_PURR); - old_thread->accum_tb += (current_tb - start_tb); - new_thread->start_tb = current_tb; + +/* Collect cpu_util utilization data per process and per processor wise */ + if (cpu_has_feature(CPU_FTR_PURR)) { + struct cpu_usage *pd = &__get_cpu_var(cpu_usage_array); + long unsigned start_cpu_util, current_cpu_util; + + pd->current_cpu_util = current_cpu_util = mfspr(SPRN_PURR); + + if (old_thread->start_cpu_util == 0) + old_thread->start_cpu_util = current_cpu_util; + + /* store delta_tb & mftb into cpu_util data array for + * later easy access otherwise you have to do run_on_cpu + * which is expensive + */ + + lpaca = get_paca(); + pd->collected_krntb = lpaca->delta_tb; + pd->collected_timebase = mftb(); + + start_cpu_util = old_thread->start_cpu_util; + old_thread->total_dp += (current_cpu_util - start_cpu_util); + + /* collect time from entry into kernel to now and account it + * in process kernel time + */ + + old_thread->proc_stime += (current_cpu_util - lpaca->start_tb); + new_thread->start_cpu_util = current_cpu_util; } -#endif local_irq_save(flags); last = _switch(old_thread, new_thread); Index: linux-2.6.15/arch/powerpc/kernel/setup_64.c =================================================================== --- linux-2.6.15.orig/arch/powerpc/kernel/setup_64.c 2006-02-14 14:35:37.486512456 -0800 +++ linux-2.6.15/arch/powerpc/kernel/setup_64.c 2006-02-15 20:38:07.725523136 -0800 @@ -683,3 +683,156 @@ } } #endif + + +/* To initialize and collect statistics for user and kernel times + * We do the following, most of them are there for boundary conditions only. +*/ + +/* partition wide accumed variables for de-activated processors */ +u64 offline_cpu_total_tb; +u64 offline_cpu_total_cpu_util; +u64 offline_cpu_total_krncycles; +u64 offline_cpu_total_idle; + +/* Collects starting timebase and startcpu_util for each processor */ +static void collect_startcpu_util(int cpu) +{ + cpumask_t cpumask; + struct cpu_usage *cus = &per_cpu(cpu_usage_array, cpu); + + /* Store the current cpu mask */ + cpumask = current->cpus_allowed; + + /* Move to the cpu that we want to collect starting cpu_util from */ + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + BUG_ON(smp_processor_id() != cpu); + + /* Collect cpu_util & Time base */ + cus->start_cpu_util = mfspr(SPRN_PURR); + cus->start_timebase = mftb(); + + /* reset the mask */ + set_cpus_allowed(current, cpumask); +} + +/* Collect the cpu timebase and cpu_util delta's in case the cpu is being + * taken down. Only post these values to the global table once the + * CPU_DEAD notifier is invoked. These values won't be available later + * so collect values only now. +*/ + +#ifdef CONFIG_HOTPLUG_CPU +static void collect_cpu_deltas(int cpu) +{ + cpumask_t cpumask; + struct cpu_usage *pd = &per_cpu(cpu_usage_array, cpu); + struct paca_struct *lpaca; + + /* Store the current cpu mask */ + cpumask = current->cpus_allowed; + + /* Move to the cpu that we want to collect starting cpu_util from */ + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + BUG_ON(smp_processor_id() != cpu); + + /* get paca on the cpu that you moved on */ + lpaca = get_paca(); + + /* Collect delta cpu_util & Time base */ + pd->collected_krntb = lpaca->delta_tb; + pd->collected_timebase = mftb(); + + /* reset the mask */ + set_cpus_allowed(current, cpumask); +} + +/* Post the values collected by collect_cpu_deltas() function above + * only when CPU_DEAD notifier is invoked. If the values are posted + * earlier to the global variables then they would need to be deducted + * in case the cpu fails to go down. + * The reporting tool will add all online cpu values followed by the + * global partition wide values to arrive at the correct values +*/ + +static void post_cpu_deltas(int cpu) +{ + struct cpu_usage *pd = &per_cpu(cpu_usage_array, cpu); + u64 total_timebase, total_cpu_util; + total_timebase = (pd->collected_timebase - pd->start_timebase); + total_cpu_util = pd->current_cpu_util - pd->start_cpu_util; + offline_cpu_total_tb += total_timebase; + offline_cpu_total_cpu_util += total_cpu_util; + offline_cpu_total_krncycles += pd->collected_krntb; + offline_cpu_total_idle += (total_timebase - total_cpu_util); +} +#endif + +/* This function does the following. + * A. If the cpu comes online, it collects the starting cpu_util and the + * the starting timebase for that cpu. + * B. If the cpu is preparing to go down, then it harvests the information + * it needs from the cpu and stores it in a percpu array. + * C. Only & only when the cpu actually is taken offline, will it posts the + * values to the global variables. These variables collect only the info + * from cpus taken offline and store that value for later computation. +*/ + +static int __devinit cpustatus_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action) { + case CPU_ONLINE: + /* Collect The start cpu_util and start the timer on * + * CPU that just came up */ + collect_startcpu_util(cpu); + break; + +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DOWN_PREPARE: + /* Add cpu_util for that cpu to the global partition * + * value that we are collecting */ + + collect_cpu_deltas(cpu); + break; + + case CPU_DEAD: + post_cpu_deltas(cpu); + break; +#endif + + } + return NOTIFY_OK; +} + +static struct notifier_block cpustatus_notifier = { &cpustatus_callback, NULL, 0 }; + +static int __init cpu_util_init(void) +{ + int cpu; + + if (cpu_has_feature(CPU_FTR_PURR)) { + + /* Collect starting cpu values for all online cpus */ + + for_each_online_cpu(cpu){ + collect_startcpu_util(cpu); + } + + /* Initialize the global variables to zero */ + offline_cpu_total_tb = 0; + offline_cpu_total_cpu_util = 0; + offline_cpu_total_krncycles = 0; + offline_cpu_total_idle = 0; + + /* To handle addition and deletion of HOTPLUG CPU's */ + register_cpu_notifier(&cpustatus_notifier); + } + + return 0; +} + +__initcall(cpu_util_init); Index: linux-2.6.15/arch/powerpc/kernel/sysfs.c =================================================================== --- linux-2.6.15.orig/arch/powerpc/kernel/sysfs.c 2006-01-02 19:21:10.000000000 -0800 +++ linux-2.6.15/arch/powerpc/kernel/sysfs.c 2006-02-14 14:40:39.438530392 -0800 @@ -20,6 +20,8 @@ #include #include #include +#include +#include "setup.h" static DEFINE_PER_CPU(struct cpu, cpu_devices); @@ -171,6 +173,12 @@ return count; \ } +static ssize_t show_dispatchedcycles(struct sys_device *, char *); +static ssize_t show_offline_cpu_cycles(struct sys_device *, char *); + +static SYSDEV_ATTR(offline_cpu_cycles, 0444, show_offline_cpu_cycles, NULL); +static SYSDEV_ATTR(cpu_dispatched_cycles, 0444, show_dispatchedcycles, NULL); + SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0); SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); @@ -232,8 +240,11 @@ if (cur_cpu_spec->num_pmcs >= 8) sysdev_create_file(s, &attr_pmc8); - if (cpu_has_feature(CPU_FTR_SMT)) + if (cpu_has_feature(CPU_FTR_PURR)) { sysdev_create_file(s, &attr_purr); + sysdev_create_file(s, &attr_offline_cpu_cycles); + sysdev_create_file(s, &attr_cpu_dispatched_cycles); + } } #ifdef CONFIG_HOTPLUG_CPU @@ -274,8 +285,11 @@ if (cur_cpu_spec->num_pmcs >= 8) sysdev_remove_file(s, &attr_pmc8); - if (cpu_has_feature(CPU_FTR_SMT)) + if (cpu_has_feature(CPU_FTR_PURR)) { sysdev_remove_file(s, &attr_purr); + sysdev_remove_file(s, &attr_offline_cpu_cycles); + sysdev_remove_file(s, &attr_cpu_dispatched_cycles); + } } #endif /* CONFIG_HOTPLUG_CPU */ @@ -336,8 +350,48 @@ return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id)); } + static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL); +/* Display all the cpu cycles that have been given to the partition. + * Since it is possible to take processors offline, if those stats are + * ignored then those the final value will be in-correct. +*/ +static ssize_t show_offline_cpu_cycles(struct sys_device *dev, char *buf) +{ + return sprintf(buf, "%lu %lu %lu %lu\n", offline_cpu_total_tb, + offline_cpu_total_cpu_util, offline_cpu_total_krncycles, + offline_cpu_total_idle); +} + +static ssize_t show_dispatchedcycles( struct sys_device *dev, char *buf) +{ + u64 total_cpu_util, total_tb, idle_cycles, kernel_cycles, user_cycles, iowait = 0; + + struct cpu *cpu = container_of(dev, struct cpu, sysdev); + struct cpu_usage *cpu_stats = &per_cpu(cpu_usage_array, cpu->sysdev.id); + + /* Calculate time base deltas */ + total_tb = cpu_stats->collected_timebase - cpu_stats->start_timebase; + + /* Calculate cpu_util deltas */ + total_cpu_util = cpu_stats->current_cpu_util - cpu_stats->start_cpu_util ; + + /* Read kernel_cycles */ + kernel_cycles = cpu_stats->collected_krntb; + + /* Calculate User cycles */ + user_cycles = total_cpu_util - kernel_cycles ; + + /* Calculate Idle cycles*/ + idle_cycles = total_tb - total_cpu_util ; + + /* iowait in the future */ + return sprintf(buf,"%lu %lu %lu %lu %lu %lu\n", total_tb, + total_cpu_util, kernel_cycles, user_cycles, + idle_cycles, iowait); +} + static int __init topology_init(void) { int cpu; Index: linux-2.6.15/arch/powerpc/kernel/time.c =================================================================== --- linux-2.6.15.orig/arch/powerpc/kernel/time.c 2006-02-14 14:35:37.498510632 -0800 +++ linux-2.6.15/arch/powerpc/kernel/time.c 2006-02-14 14:40:39.440530088 -0800 @@ -481,9 +481,9 @@ #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + if (cpu_has_feature(CPU_FTR_PURR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); - cu->current_tb = mfspr(SPRN_PURR); + cu->current_cpu_util = mfspr(SPRN_PURR); } #endif Index: linux-2.6.15/include/asm-powerpc/cputable.h =================================================================== --- linux-2.6.15.orig/include/asm-powerpc/cputable.h 2006-02-14 14:35:43.901468984 -0800 +++ linux-2.6.15/include/asm-powerpc/cputable.h 2006-02-15 10:18:20.768473224 -0800 @@ -117,6 +117,7 @@ #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) #define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0000100000000000) #define CPU_FTR_PAUSE_ZERO ASM_CONST(0x0000200000000000) +#define CPU_FTR_PURR ASM_CONST(0x0000400000000000) #else /* ensure on 32b processors the flags are available for compiling but * don't do anything */ @@ -132,6 +133,7 @@ #define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0) #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0) #define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0) +#define CPU_FTR_PURR ASM_CONST(0x0) #endif #ifndef __ASSEMBLY__ @@ -313,7 +315,7 @@ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | - CPU_FTR_MMCRA_SIHV, + CPU_FTR_MMCRA_SIHV | CPU_FTR_PURR, CPU_FTRS_CELL = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | Index: linux-2.6.15/include/asm-powerpc/processor.h =================================================================== --- linux-2.6.15.orig/include/asm-powerpc/processor.h 2006-02-14 14:35:43.965459256 -0800 +++ linux-2.6.15/include/asm-powerpc/processor.h 2006-02-15 20:21:21.584539872 -0800 @@ -174,8 +174,9 @@ } fpscr; int fpexc_mode; /* floating-point exception mode */ #ifdef CONFIG_PPC64 - unsigned long start_tb; /* Start purr when proc switched in */ - unsigned long accum_tb; /* Total accumilated purr for process */ + unsigned long start_cpu_util; /* Start cpu_util when proc switch in */ + unsigned long total_dp ; /* Total delta cpu_util accum for proc */ + unsigned long proc_stime; /* Was pad,Now process cpu_util stime */ #endif unsigned long vdso_base; /* base of the vDSO library */ unsigned long dabr; /* Data address breakpoint register */ Index: linux-2.6.15/include/asm-powerpc/time.h =================================================================== --- linux-2.6.15.orig/include/asm-powerpc/time.h 2006-02-14 14:35:43.985456216 -0800 +++ linux-2.6.15/include/asm-powerpc/time.h 2006-02-14 14:40:39.444529480 -0800 @@ -73,6 +73,22 @@ u64 result_low; }; +#ifdef CONFIG_PPC64 +/* Used to store Processor Utilization register (cpu_util) values */ +struct cpu_usage { + /* Values required by perf & collected during startup*/ + u64 start_timebase; /* Holds the starting time base */ + u64 start_cpu_util; /* Holds the starting cpu_util for that processor */ + + /* for easy access to values instead of requiring a change in cpu */ + u64 collected_timebase; /* time at which krntime was collected. */ + u64 collected_krntb; /* Kernel delta cpu_util values */ + u64 current_cpu_util; /* Holds the current cpu_util register values */ +}; + +DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); +#endif /* CONFIG_PPC64 */ + /* Accessor functions for the timebase (RTC on 601) registers. */ /* If one day CONFIG_POWER is added just define __USE_RTC as 1 */ #ifdef CONFIG_6xx @@ -213,13 +229,6 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low, unsigned divisor, struct div_result *dr); -/* Used to store Processor Utilization register (purr) values */ - -struct cpu_usage { - u64 current_tb; /* Holds the current purr register values */ -}; - -DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); #endif /* __KERNEL__ */ #endif /* __PPC64_TIME_H */ Index: linux-2.6.15/arch/powerpc/kernel/setup.h =================================================================== --- linux-2.6.15.orig/arch/powerpc/kernel/setup.h 2006-01-02 19:21:10.000000000 -0800 +++ linux-2.6.15/arch/powerpc/kernel/setup.h 2006-02-15 10:26:43.707551048 -0800 @@ -3,4 +3,10 @@ void check_for_initrd(void); + /* Defined in setup.c */ +extern u64 offline_cpu_total_tb; +extern u64 offline_cpu_total_cpu_util; +extern u64 offline_cpu_total_krncycles; +extern u64 offline_cpu_total_idle; + #endif /* _POWERPC_KERNEL_SETUP_H */ Index: linux-2.6.15/include/asm-powerpc/paca.h =================================================================== --- linux-2.6.15.orig/include/asm-powerpc/paca.h 2006-02-14 14:35:43.946462144 -0800 +++ linux-2.6.15/include/asm-powerpc/paca.h 2006-02-15 10:25:31.477483712 -0800 @@ -96,6 +96,10 @@ u64 saved_r1; /* r1 save for RTAS calls */ u64 saved_msr; /* MSR saved here by enter_rtas */ u8 proc_enabled; /* irq soft-enable flag */ + u64 start_tb; /* Starting tb when in kernel mode */ + u8 cdflag; /* 1 when transition from U to K mode */ + u64 delta_tb; /* Delta add of all kernel trans */ + }; extern struct paca_struct paca[];