hrtimers: add slop parameter

After discussion with Thomas Gleixner, we came up with the idea of
introducing a new parameter to hrtimers (and probably eventually all
timers in the kernel, then onto userspace).  I call it "slop", and it
is an indication of how precise a timer has to be.

The idea is that this "slop" can be used to calculate what timers can
be batched, and even eventually unify normal and high res timers.

For this patch, DEFAULT_SLOP (currently 0) is used everywhere, and
the parameter is unused.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff -r 58aeae025f3f drivers/kvm/lapic.c
--- a/drivers/kvm/lapic.c	Wed Dec 19 16:06:02 2007 +1100
+++ b/drivers/kvm/lapic.c	Wed Dec 19 16:06:41 2007 +1100
@@ -968,7 +968,8 @@ int kvm_create_lapic(struct kvm_vcpu *vc
 	memset(apic->regs, 0, PAGE_SIZE);
 	apic->vcpu = vcpu;
 
-	hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS,
+		     DEFAULT_SLOP);
 	apic->timer.dev.function = apic_timer_fn;
 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
 	vcpu->apic_base = APIC_DEFAULT_PHYS_BASE;
diff -r 58aeae025f3f drivers/lguest/interrupts_and_traps.c
--- a/drivers/lguest/interrupts_and_traps.c	Wed Dec 19 16:06:02 2007 +1100
+++ b/drivers/lguest/interrupts_and_traps.c	Wed Dec 19 16:06:41 2007 +1100
@@ -503,6 +503,6 @@ static enum hrtimer_restart clockdev_fn(
 /* This sets up the timer for this Guest. */
 void init_clockdev(struct lguest *lg)
 {
-	hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+	hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS, DEFAULT_SLOP);
 	lg->hrt.function = clockdev_fn;
 }
diff -r 58aeae025f3f drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c	Wed Dec 19 16:06:02 2007 +1100
+++ b/drivers/net/virtio_net.c	Wed Dec 19 16:06:41 2007 +1100
@@ -520,7 +520,8 @@ static int virtnet_probe(struct virtio_d
 	netif_napi_add(dev, &vi->napi, virtnet_poll, 16);
 	vi->dev = dev;
 	vi->vdev = vdev;
-	hrtimer_init(&vi->tx_timer, CLOCK_REALTIME, HRTIMER_MODE_REL);
+	hrtimer_init(&vi->tx_timer, CLOCK_REALTIME, HRTIMER_MODE_REL,
+		     DEFAULT_SLOP);
 	vi->tx_timer.function = kick_xmit;
 	vi->tx_timer.cb_mode = HRTIMER_CB_SOFTIRQ;
 	vi->out_max = -1U;
diff -r 58aeae025f3f include/linux/hrtimer.h
--- a/include/linux/hrtimer.h	Wed Dec 19 16:06:02 2007 +1100
+++ b/include/linux/hrtimer.h	Wed Dec 19 16:06:41 2007 +1100
@@ -100,6 +100,7 @@ enum hrtimer_cb_mode {
  * @cb_mode:	high resolution timer feature to select the callback execution
  *		 mode
  * @cb_entry:	list head to enqueue an expired timer into the callback list
+ * @slop:	how much extra delay can be added (eg. for deferring wakeups)
  * @start_site:	timer statistics field to store the site where the timer
  *		was started
  * @start_comm: timer statistics field to store the name of the process which
@@ -118,6 +119,7 @@ struct hrtimer {
 #ifdef CONFIG_HIGH_RES_TIMERS
 	enum hrtimer_cb_mode		cb_mode;
 	struct list_head		cb_entry;
+	ktime_t				slop;
 #endif
 #ifdef CONFIG_TIMER_STATS
 	void				*start_site;
@@ -256,8 +258,9 @@ extern ktime_t ktime_get_real(void);
 /* Exported timer functions: */
 
 /* Initialize timers: */
+#define DEFAULT_SLOP ((ktime_t) { .tv64 = 0 })
 extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
-			 enum hrtimer_mode mode);
+			 enum hrtimer_mode mode, ktime_t slop);
 
 /* Basic timer operations: */
 extern int hrtimer_start(struct hrtimer *timer, ktime_t tim,
diff -r 58aeae025f3f kernel/fork.c
--- a/kernel/fork.c	Wed Dec 19 16:06:02 2007 +1100
+++ b/kernel/fork.c	Wed Dec 19 16:06:41 2007 +1100
@@ -874,7 +874,8 @@ static int copy_signal(unsigned long clo
 	init_sigpending(&sig->shared_pending);
 	INIT_LIST_HEAD(&sig->posix_timers);
 
-	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL,
+		     DEFAULT_SLOP);
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
 	sig->tsk = tsk;
diff -r 58aeae025f3f kernel/futex.c
--- a/kernel/futex.c	Wed Dec 19 16:06:02 2007 +1100
+++ b/kernel/futex.c	Wed Dec 19 16:06:41 2007 +1100
@@ -1247,7 +1247,8 @@ static int futex_wait(u32 __user *uaddr,
 		if (!abs_time)
 			schedule();
 		else {
-			hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+			hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS,
+				     DEFAULT_SLOP);
 			hrtimer_init_sleeper(&t, current);
 			t.timer.expires = *abs_time;
 
@@ -1344,7 +1345,8 @@ static int futex_lock_pi(u32 __user *uad
 
 	if (time) {
 		to = &timeout;
-		hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+		hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS,
+			     DEFAULT_SLOP);
 		hrtimer_init_sleeper(to, current);
 		to->timer.expires = *time;
 	}
diff -r 58aeae025f3f kernel/hrtimer.c
--- a/kernel/hrtimer.c	Wed Dec 19 16:06:02 2007 +1100
+++ b/kernel/hrtimer.c	Wed Dec 19 16:06:41 2007 +1100
@@ -522,9 +522,10 @@ static inline void hrtimer_init_hres(str
 /*
  * Initialize the high resolution related parts of a hrtimer
  */
-static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
+static inline void hrtimer_init_timer_hres(struct hrtimer *timer, ktime_t slop)
 {
 	INIT_LIST_HEAD(&timer->cb_entry);
+	timer->slop = slop;
 }
 
 /*
@@ -621,7 +622,9 @@ static inline int hrtimer_cb_pending(str
 static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; }
 static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
-static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
+static inline void hrtimer_init_timer_hres(struct hrtimer *timer, ktime_t slop)
+{
+}
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
@@ -987,9 +990,10 @@ ktime_t hrtimer_get_next_event(void)
  * @timer:	the timer to be initialized
  * @clock_id:	the clock to be used
  * @mode:	timer mode abs/rel
+ * @slop:	delay which can be added to timer without significant effect.
  */
 void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
-		  enum hrtimer_mode mode)
+		  enum hrtimer_mode mode, ktime_t slop)
 {
 	struct hrtimer_cpu_base *cpu_base;
 
@@ -1001,7 +1005,7 @@ void hrtimer_init(struct hrtimer *timer,
 		clock_id = CLOCK_MONOTONIC;
 
 	timer->base = &cpu_base->clock_base[clock_id];
-	hrtimer_init_timer_hres(timer);
+	hrtimer_init_timer_hres(timer, slop);
 
 #ifdef CONFIG_TIMER_STATS
 	timer->start_site = NULL;
@@ -1299,7 +1303,7 @@ long __sched hrtimer_nanosleep_restart(s
 
 	restart->fn = do_no_restart_syscall;
 
-	hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS);
+	hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS, DEFAULT_SLOP);
 	t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2;
 
 	if (do_nanosleep(&t, HRTIMER_MODE_ABS))
@@ -1326,7 +1330,7 @@ long hrtimer_nanosleep(struct timespec *
 	struct hrtimer_sleeper t;
 	ktime_t rem;
 
-	hrtimer_init(&t.timer, clockid, mode);
+	hrtimer_init(&t.timer, clockid, mode, DEFAULT_SLOP);
 	t.timer.expires = timespec_to_ktime(*rqtp);
 	if (do_nanosleep(&t, mode))
 		return 0;
diff -r 58aeae025f3f kernel/posix-timers.c
--- a/kernel/posix-timers.c	Wed Dec 19 16:06:02 2007 +1100
+++ b/kernel/posix-timers.c	Wed Dec 19 16:06:41 2007 +1100
@@ -194,7 +194,8 @@ static inline int common_clock_set(const
 
 static int common_timer_create(struct k_itimer *new_timer)
 {
-	hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
+	hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0,
+		     DEFAULT_SLOP);
 	return 0;
 }
 
@@ -755,7 +756,7 @@ common_timer_set(struct k_itimer *timr, 
 		return 0;
 
 	mode = flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
-	hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
+	hrtimer_init(&timr->it.real.timer, timr->it_clock, mode, DEFAULT_SLOP);
 	timr->it.real.timer.function = posix_timer_fn;
 
 	timer->expires = timespec_to_ktime(new_setting->it_value);
diff -r 58aeae025f3f kernel/time/tick-sched.c
--- a/kernel/time/tick-sched.c	Wed Dec 19 16:06:02 2007 +1100
+++ b/kernel/time/tick-sched.c	Wed Dec 19 16:06:41 2007 +1100
@@ -475,7 +475,8 @@ static void tick_nohz_switch_to_nohz(voi
 	 * Recycle the hrtimer in ts, so we can share the
 	 * hrtimer_forward with the highres code.
 	 */
-	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS,
+		     DEFAULT_SLOP);
 	/* Get the next period */
 	next = tick_init_jiffy_update();
 
@@ -579,7 +580,8 @@ void tick_setup_sched_timer(void)
 	/*
 	 * Emulate tick processing via per-CPU hrtimers:
 	 */
-	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS,
+		     DEFAULT_SLOP);
 	ts->sched_timer.function = tick_sched_timer;
 	ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 
diff -r 58aeae025f3f net/sched/sch_api.c
--- a/net/sched/sch_api.c	Wed Dec 19 16:06:02 2007 +1100
+++ b/net/sched/sch_api.c	Wed Dec 19 16:06:41 2007 +1100
@@ -286,7 +286,8 @@ static enum hrtimer_restart qdisc_watchd
 
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
 {
-	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS,
+		     DEFAULT_SLOP);
 	wd->timer.function = qdisc_watchdog;
 	wd->qdisc = qdisc;
 }
