cpumask: convert sched.h

This converts sched.h, particularly struct sched_group's two cpumasks.

Ideally these would become cpumask_var_t so they can be dynamically
allocated on large system, rather than full bitmaps, but the code is
hairy enough that I couldn't be sure the allocs would be done in the
right place.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/sched.h |   20 ++++-----
 kernel/sched.c        |  108 +++++++++++++++++++++++++-------------------------
 kernel/sched_fair.c   |    5 +-
 kernel/sched_rt.c     |    3 -
 kernel/sched_stats.h  |    3 -
 5 files changed, 73 insertions(+), 66 deletions(-)

diff -r a617ad8153c7 include/linux/sched.h
--- a/include/linux/sched.h	Sun Oct 05 13:21:53 2008 +1100
+++ b/include/linux/sched.h	Sun Oct 05 13:35:39 2008 +1100
@@ -734,7 +734,7 @@ enum cpu_idle_type {
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
-	cpumask_t cpumask;
+	DECLARE_BITMAP(cpus, NR_CPUS);
 
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -772,7 +772,7 @@ struct sched_domain {
 	struct sched_domain *parent;	/* top domain must be null terminated */
 	struct sched_domain *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
-	cpumask_t span;			/* span of all CPUs in this domain */
+	DECLARE_BITMAP(span, NR_CPUS);	/* span of all CPUs in this domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
 	unsigned int busy_factor;	/* less balancing by factor if busy */
@@ -826,7 +827,7 @@ struct sched_domain {
 #endif
 };
 
-extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 				    struct sched_domain_attr *dattr_new);
 extern int arch_reinit_sched_domains(void);
 
@@ -835,7 +836,7 @@ struct sched_domain_attr;
 struct sched_domain_attr;
 
 static inline void
-partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 			struct sched_domain_attr *dattr_new)
 {
 }
@@ -920,7 +921,7 @@ struct sched_class {
 	void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
 	void (*task_new) (struct rq *rq, struct task_struct *p);
 	void (*set_cpus_allowed)(struct task_struct *p,
-				 const cpumask_t *newmask);
+				 const struct cpumask *newmask);
 
 	void (*rq_online)(struct rq *rq);
 	void (*rq_offline)(struct rq *rq);
@@ -1539,12 +1540,12 @@ extern cputime_t task_gtime(struct task_
 
 #ifdef CONFIG_SMP
 extern int set_cpus_allowed_ptr(struct task_struct *p,
-				const cpumask_t *new_mask);
+				const struct cpumask *new_mask);
 #else
 static inline int set_cpus_allowed_ptr(struct task_struct *p,
-				       const cpumask_t *new_mask)
+				       const struct cpumask *new_mask)
 {
-	if (!cpu_isset(0, *new_mask))
+	if (!cpumask_test_cpu(0, new_mask))
 		return -EINVAL;
 	return 0;
 }
@@ -2131,8 +2132,8 @@ __trace_special(void *__tr, void *__data
 }
 #endif
 
-extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
-extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
+extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
+extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
 extern int sched_mc_power_savings, sched_smt_power_savings;
 
diff -r a617ad8153c7 kernel/sched.c
--- a/kernel/sched.c	Sun Oct 05 13:21:53 2008 +1100
+++ b/kernel/sched.c	Sun Oct 05 13:35:39 2008 +1100
@@ -1493,7 +1493,7 @@ tg_shares_up(struct task_group *tg, int 
 	unsigned long shares = 0;
 	int i;
 
-	for_each_cpu(i, &sd->span) {
+	for_each_cpu(i, to_cpumask(sd->span)) {
 		rq_weight += tg->cfs_rq[i]->load.weight;
 		shares += tg->cfs_rq[i]->shares;
 	}
@@ -1505,9 +1505,9 @@ tg_shares_up(struct task_group *tg, int 
 		shares = tg->shares;
 
 	if (!rq_weight)
-		rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
-
-	for_each_cpu(i, &sd->span) {
+		rq_weight = cpumask_weight(to_cpumask(sd->span)) * NICE_0_LOAD;
+
+	for_each_cpu(i, to_cpumask(sd->span)) {
 		struct rq *rq = cpu_rq(i);
 		unsigned long flags;
 
@@ -2045,15 +2045,17 @@ find_idlest_group(struct sched_domain *s
 		int i;
 
 		/* Skip over this group if it has no CPUs allowed */
-		if (!cpus_intersects(group->cpumask, p->cpus_allowed))
-			continue;
-
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		if (!cpumask_intersects(to_cpumask(group->cpus),
+					&p->cpus_allowed))
+			continue;
+
+		local_group = cpumask_test_cpu(this_cpu,
+					       to_cpumask(group->cpus));
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
 
-		for_each_cpu(i, &group->cpumask) {
+		for_each_cpu(i, to_cpumask(group->cpus)) {
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
 				load = source_load(i, load_idx);
@@ -2093,7 +2095,7 @@ find_idlest_cpu(struct sched_group *grou
 	int i;
 
 	/* Traverse only the allowed CPUs */
-	cpus_and(*tmp, group->cpumask, p->cpus_allowed);
+	cpumask_and(tmp, to_cpumask(group->cpus), &p->cpus_allowed);
 
 	for_each_cpu(i, tmp) {
 		load = weighted_cpuload(i);
@@ -2137,7 +2139,7 @@ static int sched_balance_self(int cpu, i
 		update_shares(sd);
 
 	while (sd) {
-		cpumask_t span, tmpmask;
+		cpumask_t tmpmask;
 		struct sched_group *group;
 		int new_cpu, weight;
 
@@ -2146,7 +2148,6 @@ static int sched_balance_self(int cpu, i
 			continue;
 		}
 
-		span = sd->span;
 		group = find_idlest_group(sd, t, cpu);
 		if (!group) {
 			sd = sd->child;
@@ -2162,10 +2163,10 @@ static int sched_balance_self(int cpu, i
 
 		/* Now try balancing at a lower domain level of new_cpu */
 		cpu = new_cpu;
+		weight = cpumask_weight(to_cpumask(sd->span));
 		sd = NULL;
-		weight = cpus_weight(span);
 		for_each_domain(cpu, tmp) {
-			if (weight <= cpus_weight(tmp->span))
+			if (weight <= cpumask_weight(to_cpumask(tmp->span)))
 				break;
 			if (tmp->flags & flag)
 				sd = tmp;
@@ -2210,7 +2211,7 @@ static int try_to_wake_up(struct task_st
 		cpu = task_cpu(p);
 
 		for_each_domain(this_cpu, sd) {
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpumask_test_cpu(cpu, to_cpumask(sd->span))) {
 				update_shares(sd);
 				break;
 			}
@@ -2258,7 +2259,7 @@ static int try_to_wake_up(struct task_st
 	else {
 		struct sched_domain *sd;
 		for_each_domain(this_cpu, sd) {
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpumask_test_cpu(cpu, to_cpumask(sd->span))) {
 				schedstat_inc(sd, ttwu_wake_remote);
 				break;
 			}
@@ -3108,10 +3109,11 @@ find_busiest_group(struct sched_domain *
 		unsigned long sum_avg_load_per_task;
 		unsigned long avg_load_per_task;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpumask_test_cpu(this_cpu,
+					       to_cpumask(group->cpus));
 
 		if (local_group)
-			balance_cpu = first_cpu(group->cpumask);
+			balance_cpu = cpumask_first(to_cpumask(group->cpus));
 
 		/* Tally up the load of all CPUs in the group */
 		sum_weighted_load = sum_nr_running = avg_load = 0;
@@ -3120,7 +3122,7 @@ find_busiest_group(struct sched_domain *
 		max_cpu_load = 0;
 		min_cpu_load = ~0UL;
 
-		for_each_cpu(i, &group->cpumask) {
+		for_each_cpu(i, to_cpumask(group->cpus)) {
 			struct rq *rq;
 
 			if (!cpu_isset(i, *cpus))
@@ -3237,8 +3239,8 @@ find_busiest_group(struct sched_domain *
 		 */
 		if ((sum_nr_running < min_nr_running) ||
 		    (sum_nr_running == min_nr_running &&
-		     first_cpu(group->cpumask) <
-		     first_cpu(group_min->cpumask))) {
+		     cpumask_first(to_cpumask(group->cpus)) <
+		     cpumask_first(to_cpumask(group_min->cpus)))) {
 			group_min = group;
 			min_nr_running = sum_nr_running;
 			min_load_per_task = sum_weighted_load /
@@ -3253,8 +3255,8 @@ find_busiest_group(struct sched_domain *
 		if (sum_nr_running <= group_capacity - 1) {
 			if (sum_nr_running > leader_nr_running ||
 			    (sum_nr_running == leader_nr_running &&
-			     first_cpu(group->cpumask) >
-			      first_cpu(group_leader->cpumask))) {
+			     cpumask_first(to_cpumask(group->cpus)) >
+			     cpumask_first(to_cpumask(group_leader->cpus)))) {
 				group_leader = group;
 				leader_nr_running = sum_nr_running;
 			}
@@ -3399,7 +3401,7 @@ find_busiest_queue(struct sched_group *g
 	unsigned long max_load = 0;
 	int i;
 
-	for_each_cpu(i, &group->cpumask) {
+	for_each_cpu(i, to_cpumask(group->cpus)) {
 		unsigned long wl;
 
 		if (!cpu_isset(i, *cpus))
@@ -3745,7 +3747,7 @@ static void active_load_balance(struct r
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
-		    cpu_isset(busiest_cpu, sd->span))
+		    cpumask_test_cpu(busiest_cpu, to_cpumask(sd->span)))
 				break;
 	}
 
@@ -6579,7 +6581,7 @@ static int sched_domain_debug_one(struct
 	struct sched_group *group = sd->groups;
 	char str[256];
 
-	cpulist_scnprintf(str, sizeof(str), &sd->span);
+	cpulist_scnprintf(str, sizeof(str), to_cpumask(sd->span));
 	cpus_clear(*groupmask);
 
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@ -6595,11 +6597,11 @@ static int sched_domain_debug_one(struct
 	printk(KERN_CONT "span %s level %s\n",
 		str, sd_level_to_string(sd->level));
 
-	if (!cpu_isset(cpu, sd->span)) {
+	if (!cpumask_test_cpu(cpu, to_cpumask(sd->span))) {
 		printk(KERN_ERR "ERROR: domain->span does not contain "
 				"CPU%d\n", cpu);
 	}
-	if (!cpu_isset(cpu, group->cpumask)) {
+	if (!cpumask_test_cpu(cpu, to_cpumask(group->cpus))) {
 		printk(KERN_ERR "ERROR: domain->groups does not contain"
 				" CPU%d\n", cpu);
 	}
@@ -6619,31 +6621,32 @@ static int sched_domain_debug_one(struct
 			break;
 		}
 
-		if (!cpus_weight(group->cpumask)) {
+		if (!cpumask_weight(to_cpumask(group->cpus))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: empty group\n");
 			break;
 		}
 
-		if (cpus_intersects(*groupmask, group->cpumask)) {
+		if (cpumask_intersects(groupmask, to_cpumask(group->cpus))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: repeated CPUs\n");
 			break;
 		}
 
-		cpus_or(*groupmask, *groupmask, group->cpumask);
-
-		cpulist_scnprintf(str, sizeof(str), &group->cpumask);
+		cpumask_or(groupmask, groupmask, to_cpumask(group->cpus));
+
+		cpulist_scnprintf(str, sizeof(str), to_cpumask(group->cpus));
 		printk(KERN_CONT " %s", str);
 
 		group = group->next;
 	} while (group != sd->groups);
 	printk(KERN_CONT "\n");
 
-	if (!cpus_equal(sd->span, *groupmask))
+	if (!cpumask_equal(to_cpumask(sd->span), groupmask))
 		printk(KERN_ERR "ERROR: groups don't span domain->span\n");
 
-	if (sd->parent && !cpus_subset(*groupmask, sd->parent->span))
+	if (sd->parent &&
+	    !cpumask_subset(groupmask, to_cpumask(sd->parent->span)))
 		printk(KERN_ERR "ERROR: parent span is not a superset "
 			"of domain->span\n");
 	return 0;
@@ -6683,7 +6686,7 @@ static void sched_domain_debug(struct sc
 
 static int sd_degenerate(struct sched_domain *sd)
 {
-	if (cpus_weight(sd->span) == 1)
+	if (cpumask_weight(to_cpumask(sd->span)) == 1)
 		return 1;
 
 	/* Following flags need at least 2 groups */
@@ -6714,7 +6717,7 @@ sd_parent_degenerate(struct sched_domain
 	if (sd_degenerate(parent))
 		return 1;
 
-	if (!cpus_equal(sd->span, parent->span))
+	if (!cpumask_equal(to_cpumask(sd->span), to_cpumask(parent->span)))
 		return 0;
 
 	/* Does parent contain flags not in child? */
@@ -6876,7 +6879,7 @@ init_sched_build_groups(const cpumask_t 
 		if (cpu_isset(i, *covered))
 			continue;
 
-		cpus_clear(sg->cpumask);
+		cpumask_clear(to_cpumask(sg->cpus));
 		sg->__cpu_power = 0;
 
 		for_each_cpu(j, span) {
@@ -6884,7 +6887,7 @@ init_sched_build_groups(const cpumask_t 
 				continue;
 
 			cpu_set(j, *covered);
-			cpu_set(j, sg->cpumask);
+			cpumask_set_cpu(j, to_cpumask(sg->cpus));
 		}
 		if (!first)
 			first = sg;
@@ -7079,11 +7082,11 @@ static void init_numa_sched_groups_power
 	if (!sg)
 		return;
 	do {
-		for_each_cpu(j, &sg->cpumask) {
+		for_each_cpu(j, to_cpumask(sg->cpus)) {
 			struct sched_domain *sd;
 
 			sd = &per_cpu(phys_domains, j);
-			if (j != first_cpu(sd->groups->cpumask)) {
+			if (j != cpumask_first(to_cpumask(sd->groups->cpus))) {
 				/*
 				 * Only add "power" once for each
 				 * physical package.
@@ -7160,7 +7163,7 @@ static void init_sched_groups_power(int 
 
 	WARN_ON(!sd || !sd->groups);
 
-	if (cpu != first_cpu(sd->groups->cpumask))
+	if (cpu != cpumask_first(to_cpumask(sd->groups->cpus)))
 		return;
 
 	child = sd->child;
@@ -7324,7 +7327,7 @@ static int __build_sched_domains(const c
 			sd = &per_cpu(allnodes_domains, i);
 			SD_INIT(sd, ALLNODES);
 			set_domain_attribute(sd, attr);
-			sd->span = *cpu_map;
+			cpumask_copy(to_cpumask(sd->span), cpu_map);
 			cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
 			p = sd;
 			sd_allnodes = 1;
@@ -7334,18 +7337,19 @@ static int __build_sched_domains(const c
 		sd = &per_cpu(node_domains, i);
 		SD_INIT(sd, NODE);
 		set_domain_attribute(sd, attr);
-		sched_domain_node_span(cpu_to_node(i), &sd->span);
+		sched_domain_node_span(cpu_to_node(i), to_cpumask(sd->span));
 		sd->parent = p;
 		if (p)
 			p->child = sd;
-		cpus_and(sd->span, sd->span, *cpu_map);
+		cpumask_and(to_cpumask(sd->span),
+			    to_cpumask(sd->span), cpu_map);
 #endif
 
 		p = sd;
 		sd = &per_cpu(phys_domains, i);
 		SD_INIT(sd, CPU);
 		set_domain_attribute(sd, attr);
-		sd->span = *nodemask;
+		cpumask_copy(to_cpumask(sd->span), nodemask);
 		sd->parent = p;
 		if (p)
 			p->child = sd;
@@ -7356,8 +7360,8 @@ static int __build_sched_domains(const c
 		sd = &per_cpu(core_domains, i);
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
-		sd->span = *cpu_coregroup_map(i);
-		cpus_and(sd->span, sd->span, *cpu_map);
+		cpumask_and(to_cpumask(sd->span),
+			    cpu_coregroup_map(i), cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7368,8 +7372,8 @@ static int __build_sched_domains(const c
 		sd = &per_cpu(cpu_domains, i);
 		SD_INIT(sd, SIBLING);
 		set_domain_attribute(sd, attr);
-		sd->span = per_cpu(cpu_sibling_map, i);
-		cpus_and(sd->span, sd->span, *cpu_map);
+		cpumask_and(to_cpumask(sd->span),
+			    &per_cpu(cpu_sibling_map, i), cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7455,7 +7459,7 @@ static int __build_sched_domains(const c
 			sd->groups = sg;
 		}
 		sg->__cpu_power = 0;
-		sg->cpumask = *nodemask;
+		cpumask_copy(to_cpumask(sg->cpus), nodemask);
 		sg->next = sg;
 		cpus_or(*covered, *covered, *nodemask);
 		prev = sg;
@@ -7482,7 +7486,7 @@ static int __build_sched_domains(const c
 				goto error;
 			}
 			sg->__cpu_power = 0;
-			sg->cpumask = *tmpmask;
+			cpumask_copy(to_cpumask(sg->cpus), tmpmask);
 			sg->next = prev->next;
 			cpus_or(*covered, *covered, *tmpmask);
 			prev->next = sg;
diff -r a617ad8153c7 kernel/sched_fair.c
--- a/kernel/sched_fair.c	Sun Oct 05 13:21:53 2008 +1100
+++ b/kernel/sched_fair.c	Sun Oct 05 13:35:39 2008 +1100
@@ -1031,7 +1031,8 @@ static int wake_idle(int cpu, struct tas
 		if ((sd->flags & SD_WAKE_IDLE)
 		    || ((sd->flags & SD_WAKE_IDLE_FAR)
 			&& !task_hot(p, task_rq(p)->clock, sd))) {
-			cpus_and(tmp, sd->span, p->cpus_allowed);
+			cpumask_and(&tmp, to_cpumask(sd->span),
+				    &p->cpus_allowed);
 			cpus_and(tmp, tmp, cpu_active_map);
 			for_each_cpu_mask(i, tmp) {
 				if (idle_cpu(i)) {
@@ -1226,7 +1227,7 @@ static int select_task_rq_fair(struct ta
 	 * this_cpu and prev_cpu are present in:
 	 */
 	for_each_domain(this_cpu, sd) {
-		if (cpu_isset(prev_cpu, sd->span)) {
+		if (cpumask_test_cpu(prev_cpu, to_cpumask(sd->span))) {
 			this_sd = sd;
 			break;
 		}
diff -r a617ad8153c7 kernel/sched_rt.c
--- a/kernel/sched_rt.c	Sun Oct 05 13:21:53 2008 +1100
+++ b/kernel/sched_rt.c	Sun Oct 05 13:35:39 2008 +1100
@@ -969,7 +969,8 @@ static int find_lowest_rq(struct task_st
 			cpumask_t domain_mask;
 			int       best_cpu;
 
-			cpus_and(domain_mask, sd->span, *lowest_mask);
+			cpumask_and(&domain_mask,
+				    to_cpumask(sd->span), lowest_mask);
 
 			best_cpu = pick_optimal_cpu(this_cpu,
 						    &domain_mask);
diff -r a617ad8153c7 kernel/sched_stats.h
--- a/kernel/sched_stats.h	Sun Oct 05 13:21:53 2008 +1100
+++ b/kernel/sched_stats.h	Sun Oct 05 13:35:39 2008 +1100
@@ -42,7 +42,8 @@ static int show_schedstat(struct seq_fil
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 
-			cpumask_scnprintf(mask_str, mask_len, &sd->span);
+			cpumask_scnprintf(mask_str, mask_len,
+					  to_cpumask(sd->span));
 			seq_printf(seq, "domain%d %s", dcount++, mask_str);
 			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
 					itype++) {