cpumask: use percpu allocations instead of arrays in ebtables

Instead of allocating an nr_cpu_ids array, most places should be using
percpu_alloc().  This doesn't waste space if cpu numbers aren't
contiguous.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/netfilter_bridge/ebtables.h |    2 -
 net/bridge/netfilter/ebtables.c           |   46 ++++++++++++++++--------------
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -277,7 +277,7 @@ struct ebt_table_info
 	/* pointers to the start of the chains */
 	struct ebt_entries *hook_entry[NF_BR_NUMHOOKS];
 	/* room to maintain the stack used for jumping from and into udc */
-	struct ebt_chainstack **chainstack;
+	struct ebt_chainstack **chainstack_pcpu;
 	char *entries;
 	struct ebt_counter counters[0] ____cacheline_aligned;
 };
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -861,18 +861,22 @@ static int translate_table(char *name, s
 	if (udc_cnt) {
 		/* this will get free'd in do_replace()/ebt_register_table()
 		   if an error occurs */
-		newinfo->chainstack =
-			vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack)));
-		if (!newinfo->chainstack)
+		newinfo->chainstack_pcpu = alloc_percpu(struct ebt_chainstack **);
+		if (!newinfo->chainstack_pcpu)
 			return -ENOMEM;
 		for_each_possible_cpu(i) {
-			newinfo->chainstack[i] =
-			  vmalloc(udc_cnt * sizeof(*(newinfo->chainstack[0])));
-			if (!newinfo->chainstack[i]) {
-				while (i)
-					vfree(newinfo->chainstack[--i]);
-				vfree(newinfo->chainstack);
-				newinfo->chainstack = NULL;
+			*per_cpu_ptr(newinfo->chainstack, i) =
+			  vmalloc(udc_cnt * sizeof(*(newinfo->chainstack_pcpu[0])));
+			if (!*per_cpu_ptr(newinfo->chainstack, i)) {
+				int j;
+				for_each_possible_cpu(j) {
+					if (j == i)
+						break;
+					vfree(*per_cpu_ptr(newinfo->chainstack_pcpu,
+							  j));
+				}
+				free_percpu(newinfo->chainstack_pcpu);
+				newinfo->chainstack_pcpu = NULL;
 				return -ENOMEM;
 			}
 		}
@@ -1008,7 +1012,7 @@ static int do_replace(struct net *net, v
 		counterstmp = NULL;
 
 	/* this can get initialized by translate_table() */
-	newinfo->chainstack = NULL;
+	newinfo->chainstack_pcpu = NULL;
 	ret = ebt_verify_pointers(&tmp, newinfo);
 	if (ret != 0)
 		goto free_counterstmp;
@@ -1069,10 +1073,10 @@ static int do_replace(struct net *net, v
 	   ebt_cleanup_entry, NULL);
 
 	vfree(table->entries);
-	if (table->chainstack) {
+	if (table->chainstack_pcpu) {
 		for_each_possible_cpu(i)
-			vfree(table->chainstack[i]);
-		vfree(table->chainstack);
+			vfree(*per_cpu_ptr(newinfo->chainstack_pcpu, i));
+		free_percpu(table->chainstack_pcpu);
 	}
 	vfree(table);
 
@@ -1087,10 +1091,10 @@ free_counterstmp:
 free_counterstmp:
 	vfree(counterstmp);
 	/* can be initialized in translate_table() */
-	if (newinfo->chainstack) {
+	if (newinfo->chainstack_pcpu) {
 		for_each_possible_cpu(i)
-			vfree(newinfo->chainstack[i]);
-		vfree(newinfo->chainstack);
+			vfree(*per_cpu_ptr(newinfo->chainstack_pcpu, i));
+		free_percpu(table->chainstack_pcpu);
 	}
 free_entries:
 	vfree(newinfo->entries);
@@ -1187,8 +1191,8 @@ free_chainstack:
 free_chainstack:
 	if (newinfo->chainstack) {
 		for_each_possible_cpu(i)
-			vfree(newinfo->chainstack[i]);
-		vfree(newinfo->chainstack);
+			vfree(*per_cpu_ptr(newinfo->chainstack_pcpu, i));
+		free_percpu(table->chainstack_pcpu);
 	}
 	vfree(newinfo->entries);
 free_newinfo:
@@ -1217,8 +1221,8 @@ void ebt_unregister_table(struct ebt_tab
 	vfree(table->private->entries);
 	if (table->private->chainstack) {
 		for_each_possible_cpu(i)
-			vfree(table->private->chainstack[i]);
-		vfree(table->private->chainstack);
+			vfree(*per_cpu_ptr(newinfo->chainstack_pcpu, i));
+		free_percpu(table->chainstack_pcpu);
 	}
 	vfree(table->private);
 	kfree(table);
