cpumask: use percpu allocations instead of arrays in kernel/trace/ring_buffer.c

Instead of allocating an nr_cpu_ids array, most places should be using
percpu_alloc().  This doesn't waste space if cpu numbers aren't
contiguous.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 kernel/trace/ring_buffer.c |   76 ++++++++++++++++++++++-----------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -272,13 +272,12 @@ struct ring_buffer {
 struct ring_buffer {
 	unsigned			pages;
 	unsigned			flags;
-	int				cpus;
 	cpumask_var_t			cpumask;
 	atomic_t			record_disabled;
 
 	struct mutex			mutex;
 
-	struct ring_buffer_per_cpu	**buffers;
+	struct ring_buffer_per_cpu	**buffers_pcpu;
 };
 
 struct ring_buffer_iter {
@@ -450,7 +449,6 @@ struct ring_buffer *ring_buffer_alloc(un
 struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 {
 	struct ring_buffer *buffer;
-	int bsize;
 	int cpu;
 
 	/* Paranoid! Optimizes out when all is well */
@@ -475,18 +473,14 @@ struct ring_buffer *ring_buffer_alloc(un
 		buffer->pages++;
 
 	cpumask_copy(buffer->cpumask, cpu_possible_mask);
-	buffer->cpus = nr_cpu_ids;
-
-	bsize = sizeof(void *) * nr_cpu_ids;
-	buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
-				  GFP_KERNEL);
-	if (!buffer->buffers)
+	buffer->buffers_pcpu = alloc_percpu(struct ring_buffer_per_cpu);
+	if (!buffer->buffers_pcpu)
 		goto fail_free_cpumask;
 
 	for_each_buffer_cpu(buffer, cpu) {
-		buffer->buffers[cpu] =
+		*per_cpu_ptr(buffer->buffers_pcpu, cpu) =
 			rb_allocate_cpu_buffer(buffer, cpu);
-		if (!buffer->buffers[cpu])
+		if (!*per_cpu_ptr(buffer->buffers_pcpu, cpu))
 			goto fail_free_buffers;
 	}
 
@@ -496,10 +490,11 @@ struct ring_buffer *ring_buffer_alloc(un
 
  fail_free_buffers:
 	for_each_buffer_cpu(buffer, cpu) {
-		if (buffer->buffers[cpu])
-			rb_free_cpu_buffer(buffer->buffers[cpu]);
+		if (*per_cpu_ptr(buffer->buffers_pcpu, cpu))
+			rb_free_cpu_buffer(*per_cpu_ptr(buffer->buffers_pcpu,
+							cpu));
 	}
-	kfree(buffer->buffers);
+	free_percpu(buffer->buffers_pcpu);
 
  fail_free_cpumask:
 	free_cpumask_var(buffer->cpumask);
@@ -520,7 +515,7 @@ ring_buffer_free(struct ring_buffer *buf
 	int cpu;
 
 	for_each_buffer_cpu(buffer, cpu)
-		rb_free_cpu_buffer(buffer->buffers[cpu]);
+		rb_free_cpu_buffer(*per_cpu_ptr(buffer->buffers_pcpu, cpu));
 
 	free_cpumask_var(buffer->cpumask);
 
@@ -641,7 +636,7 @@ int ring_buffer_resize(struct ring_buffe
 		rm_pages = buffer->pages - nr_pages;
 
 		for_each_buffer_cpu(buffer, cpu) {
-			cpu_buffer = buffer->buffers[cpu];
+			cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 			rb_remove_pages(cpu_buffer, rm_pages);
 		}
 		goto out;
@@ -679,7 +674,7 @@ int ring_buffer_resize(struct ring_buffe
 	}
 
 	for_each_buffer_cpu(buffer, cpu) {
-		cpu_buffer = buffer->buffers[cpu];
+		cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 		rb_insert_pages(cpu_buffer, &pages, new_pages);
 	}
 
@@ -1300,7 +1295,7 @@ ring_buffer_lock_reserve(struct ring_buf
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		goto out;
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
@@ -1360,7 +1355,7 @@ int ring_buffer_unlock_commit(struct rin
 	struct ring_buffer_per_cpu *cpu_buffer;
 	int cpu = raw_smp_processor_id();
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 
 	rb_commit(cpu_buffer, event);
 
@@ -1413,7 +1408,7 @@ int ring_buffer_write(struct ring_buffer
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		goto out;
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 
 	if (atomic_read(&cpu_buffer->record_disabled))
 		goto out;
@@ -1495,7 +1490,7 @@ void ring_buffer_record_disable_cpu(stru
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 	atomic_inc(&cpu_buffer->record_disabled);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
@@ -1515,7 +1510,7 @@ void ring_buffer_record_enable_cpu(struc
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 	atomic_dec(&cpu_buffer->record_disabled);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
@@ -1532,7 +1527,7 @@ unsigned long ring_buffer_entries_cpu(st
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 0;
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 	return cpu_buffer->entries;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
@@ -1549,7 +1544,7 @@ unsigned long ring_buffer_overrun_cpu(st
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 0;
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 	return cpu_buffer->overrun;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
@@ -1569,7 +1564,7 @@ unsigned long ring_buffer_entries(struct
 
 	/* if you care about this being correct, lock the buffer */
 	for_each_buffer_cpu(buffer, cpu) {
-		cpu_buffer = buffer->buffers[cpu];
+		cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 		entries += cpu_buffer->entries;
 	}
 
@@ -1592,7 +1587,7 @@ unsigned long ring_buffer_overruns(struc
 
 	/* if you care about this being correct, lock the buffer */
 	for_each_buffer_cpu(buffer, cpu) {
-		cpu_buffer = buffer->buffers[cpu];
+		cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 		overruns += cpu_buffer->overrun;
 	}
 
@@ -1867,7 +1862,7 @@ rb_buffer_peek(struct ring_buffer *buffe
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 
  again:
 	/*
@@ -1991,10 +1986,11 @@ struct ring_buffer_event *
 struct ring_buffer_event *
 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 {
-	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
 	unsigned long flags;
 
+	cpu_buffer = per_cpu_ptr(buffer->buffers_pcpu, cpu);
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 	event = rb_buffer_peek(buffer, cpu, ts);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
@@ -2035,13 +2031,14 @@ struct ring_buffer_event *
 struct ring_buffer_event *
 ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 {
-	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
 	unsigned long flags;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
+	cpu_buffer = per_cpu_ptr(buffer->buffers_pcpu, cpu);
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 
 	event = rb_buffer_peek(buffer, cpu, ts);
@@ -2083,7 +2080,7 @@ ring_buffer_read_start(struct ring_buffe
 	if (!iter)
 		return NULL;
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 
 	iter->cpu_buffer = cpu_buffer;
 
@@ -2186,11 +2183,13 @@ rb_reset_cpu(struct ring_buffer_per_cpu 
  */
 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
 {
-	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+	struct ring_buffer_per_cpu *cpu_buffer;
 	unsigned long flags;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return;
+
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 
@@ -2228,7 +2227,7 @@ int ring_buffer_empty(struct ring_buffer
 
 	/* yes this is racy, but if you don't like the race, lock the buffer */
 	for_each_buffer_cpu(buffer, cpu) {
-		cpu_buffer = buffer->buffers[cpu];
+		cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 		if (!rb_per_cpu_empty(cpu_buffer))
 			return 0;
 	}
@@ -2248,7 +2247,7 @@ int ring_buffer_empty_cpu(struct ring_bu
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return 1;
 
-	cpu_buffer = buffer->buffers[cpu];
+	cpu_buffer = *per_cpu_ptr(buffer->buffers_pcpu, cpu);
 	return rb_per_cpu_empty(cpu_buffer);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
@@ -2277,8 +2276,8 @@ int ring_buffer_swap_cpu(struct ring_buf
 	if (buffer_a->pages != buffer_b->pages)
 		return -EINVAL;
 
-	cpu_buffer_a = buffer_a->buffers[cpu];
-	cpu_buffer_b = buffer_b->buffers[cpu];
+	cpu_buffer_a = *per_cpu_ptr(buffer_a->buffers_pcpu, cpu);
+	cpu_buffer_b = *per_cpu_ptr(buffer_b->buffers_pcpu, cpu);
 
 	/*
 	 * We can't do a synchronize_sched here because this
@@ -2289,8 +2288,8 @@ int ring_buffer_swap_cpu(struct ring_buf
 	atomic_inc(&cpu_buffer_a->record_disabled);
 	atomic_inc(&cpu_buffer_b->record_disabled);
 
-	buffer_a->buffers[cpu] = cpu_buffer_b;
-	buffer_b->buffers[cpu] = cpu_buffer_a;
+	*per_cpu_ptr(buffer_a->buffers_pcpu, cpu) = cpu_buffer_b;
+	*per_cpu_ptr(buffer_b->buffers_pcpu, cpu) = cpu_buffer_a;
 
 	cpu_buffer_b->buffer = buffer_a;
 	cpu_buffer_a->buffer = buffer_b;
@@ -2399,7 +2398,7 @@ int ring_buffer_read_page(struct ring_bu
 int ring_buffer_read_page(struct ring_buffer *buffer,
 			    void **data_page, int cpu, int full)
 {
-	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
 	struct buffer_data_page *bpage;
 	unsigned long flags;
@@ -2412,6 +2411,7 @@ int ring_buffer_read_page(struct ring_bu
 	if (!bpage)
 		return 0;
 
+	cpu_buffer = per_cpu_ptr(buffer->buffers_pcpu, cpu);
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 
 	/*
