---
 kernel/stop_machine.c |   91 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 86 insertions(+), 5 deletions(-)

diff -r c488e003d8a2 kernel/stop_machine.c
--- a/kernel/stop_machine.c	Mon Jun 30 15:23:09 2008 +1000
+++ b/kernel/stop_machine.c	Tue Jul 01 11:27:52 2008 +1000
@@ -12,6 +12,8 @@
 
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/irq_regs.h>
 
 /* This controls the threads on each CPU. */
 enum stopmachine_state {
@@ -106,15 +108,15 @@ int __stop_machine_run(int (*fn)(void *)
 	struct stop_machine_data active, idle;
 	struct task_struct **threads;
 
+	/* If they don't care which cpu fn runs on, just pick one. */
+	if (cpu == NR_CPUS)
+		cpu = any_online_cpu(cpu_online_map);
+
 	active.fn = fn;
 	active.data = data;
 	active.fnret = 0;
 	idle.fn = chill;
 	idle.data = NULL;
-
-	/* If they don't care which cpu fn runs on, just pick one. */
-	if (cpu == NR_CPUS)
-		cpu = any_online_cpu(cpu_online_map);
 
 	/* This could be too big for stack on large machines. */
 	threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
@@ -176,13 +178,92 @@ kill_threads:
 	return err;
 }
 
+struct ipi_data {
+	atomic_t acked;
+	atomic_t failed;
+	unsigned int cpu;
+	int fnret;
+	int (*fn)(void *data);
+	void *data;
+};
+
+static void ipi_func(void *info)
+{
+	struct ipi_data *ipi = info;
+	bool ok = false;
+
+	if (user_mode(get_irq_regs()))
+		ok = true;
+	else {
+#ifdef CONFIG_PREEMPT
+		/* We're in an interrupt, ok, but were we preemptible
+		 * before that? */
+		if ((hardirq_count() >> HARDIRQ_SHIFT) == 1) {
+			int prev = preempt_count() & ~HARDIRQ_MASK;
+			if ((prev & ~PREEMPT_ACTIVE) == PREEMPT_INATOMIC_BASE)
+				ok = true;
+		}
+#endif
+	}
+
+	if (!ok) {
+		/* Mark our failure before acking. */
+		atomic_inc(&ipi->failed);
+		wmb();
+	}
+
+	if (smp_processor_id() != ipi->cpu) {
+		/* Wait for cpu to call function (last to ack). */
+		atomic_inc(&ipi->acked);
+		while (atomic_read(&ipi->acked) != num_online_cpus())
+			cpu_relax();
+	} else {
+		while (atomic_read(&ipi->acked) != num_online_cpus() - 1)
+			cpu_relax();
+		/* Must read acked before failed. */
+		rmb();
+
+		/* Call function if noone failed. */
+		if (atomic_read(&ipi->failed) == 0)
+			ipi->fnret = ipi->fn(ipi->data);
+		atomic_inc(&ipi->acked);
+	}
+}
+
+static bool try_ipi_stop(int (*fn)(void *), void *data, unsigned int cpu,
+			 int *ret)
+{
+	struct ipi_data ipi;
+
+	/* If they don't care which cpu fn runs on, just pick one. */
+	if (cpu == NR_CPUS)
+		ipi.cpu = any_online_cpu(cpu_online_map);
+	else
+		ipi.cpu = cpu;
+
+	atomic_set(&ipi.acked, 0);
+	atomic_set(&ipi.failed, 0);
+	ipi.fn = fn;
+	ipi.data = data;
+	ipi.fnret = 0;
+
+	smp_call_function(ipi_func, &ipi, 0, 1);
+
+	printk("stop_machine: ipi acked %u failed %u\n",
+	       atomic_read(&ipi.acked), atomic_read(&ipi.failed));
+	*ret = ipi.fnret;
+	return (atomic_read(&ipi.failed) == 0);
+}
+
 int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 {
 	int ret;
 
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
-	ret = __stop_machine_run(fn, data, cpu);
+	/* Opportunistic: this often works. */
+	if (!try_ipi_stop(fn, data, cpu, &ret))
+		ret = __stop_machine_run(fn, data, cpu);
 	put_online_cpus();
 
 	return ret;
