---
 include/linux/percpu.h |   11 ++++++-----
 mm/allocpercpu.c       |    8 +++++++-
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -81,7 +81,8 @@ struct percpu_data {
         (__typeof__(ptr))__p->ptrs[(cpu)];	          \
 })
 
-extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
+extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask,
+				 const char *file, unsigned int line);
 extern void percpu_free(void *__pdata);
 
 #else /* CONFIG_SMP */
@@ -100,15 +101,15 @@ static inline void percpu_free(void *__p
 
 #endif /* CONFIG_SMP */
 
-#define percpu_alloc_mask(size, gfp, mask) \
-	__percpu_alloc_mask((size), (gfp), &(mask))
+#define percpu_alloc_mask(size, gfp, mask, file, line)	\
+	__percpu_alloc_mask((size), (gfp), &(mask), file, line)
 
-#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
+#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map, __FILE__, __LINE__)
 
 /* (legacy) interface for use without CPU hotplug handling */
 
 #define __alloc_percpu(size)	percpu_alloc_mask((size), GFP_KERNEL, \
-						  cpu_possible_map)
+						  cpu_possible_map, __FILE__, __LINE__)
 #define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type))
 #define free_percpu(ptr)	percpu_free((ptr))
 #define per_cpu_ptr(ptr, cpu)	percpu_ptr((ptr), (cpu))
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -98,6 +98,7 @@ static int __percpu_populate_mask(void *
 #define percpu_populate_mask(__pdata, size, gfp, mask) \
 	__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
 
+static atomic_t total;
 /**
  * percpu_alloc_mask - initial setup of per-cpu data
  * @size: size of per-cpu object
@@ -108,7 +109,8 @@ static int __percpu_populate_mask(void *
  * which is simplified by the percpu_alloc() wrapper.
  * Per-cpu objects are populated with zeroed buffers.
  */
-void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask,
+			  const char *file, unsigned int line)
 {
 	/*
 	 * We allocate whole cache lines to avoid false sharing
@@ -116,6 +118,10 @@ void *__percpu_alloc_mask(size_t size, g
 	size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size());
 	void *pdata = kzalloc(sz, gfp);
 	void *__pdata = __percpu_disguise(pdata);
+
+	atomic_add(size, &total);
+	printk("%s:%u wants +%u (total %u)\n", file, line, size,
+	       atomic_read(&total));
 
 	if (unlikely(!pdata))
 		return NULL;
