virtio: threshold for notifications

When the host suppresses notifications, it can reduce the number of
guest exits and improve performance.

If we use a static threshold, we get the following results for 1G TCP
Guest->Host xmit using lguest:

Threshold		Seconds		Timeouts	Notifies
10			19.3884		131023		888650
20			18.4506		364787		696192
30			17.1893		260382		431136
40			17.172		279006		1112
50			17.0873		282943		816
60			16.7904		282645		955
70			16.6979		283811		1144
100			16.5922		280790		1039
128			17.3227		283651		966

Dynamic			17.4682		275860		64153
			16.8616		291491		1270




Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c |    9 +++++++
 drivers/virtio/virtio_ring.c  |   48 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 2 deletions(-)

diff -r 54fcc5b8bead Documentation/lguest/lguest.c
--- a/Documentation/lguest/lguest.c	Tue Jun 10 16:58:05 2008 +1000
+++ b/Documentation/lguest/lguest.c	Wed Jun 11 10:06:45 2008 +1000
@@ -75,6 +75,8 @@ static bool verbose;
 #define verbose(args...) \
 	do { if (verbose) printf(args); } while(0)
 /*:*/
+
+static int num_timeouts, num_net_notifies;
 
 /* The pipe to send commands to the waker process */
 static int waker_fd;
@@ -859,6 +861,10 @@ static bool handle_console_input(int fd,
 				/* Just in case waker is blocked in BREAK, send
 				 * unbreak now. */
 				write(fd, args, sizeof(args));
+				printf("Number of net notifies: %u\n",
+				       num_net_notifies - num_timeouts);
+				printf("Number of timeouts: %u\n",
+				       num_timeouts);
 				exit(2);
 			}
 			abort->count = 0;
@@ -915,6 +921,8 @@ static void handle_net_output(int fd, st
 	unsigned int head, out, in, num = 0;
 	int len;
 	struct iovec iov[vq->vring.num];
+
+	num_net_notifies++;
 
 	/* Keep getting output buffers from the Guest until we run out. */
 	while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
@@ -1340,6 +1348,7 @@ static void setup_console(void)
 
 static void timeout_alarm(int sig)
 {
+	num_timeouts++;
 	write(timeoutpipe[1], "", 1);
 }
 
diff -r 54fcc5b8bead drivers/virtio/virtio_ring.c
--- a/drivers/virtio/virtio_ring.c	Tue Jun 10 16:58:05 2008 +1000
+++ b/drivers/virtio/virtio_ring.c	Wed Jun 11 10:06:45 2008 +1000
@@ -53,6 +53,11 @@ struct vring_virtqueue
 	/* Number we've added since last sync. */
 	unsigned int num_added;
 
+	/* Previous number of outstanding buffers. */
+	unsigned int last_outstanding;
+	/* Expected high watermark: where we should force notify host. */
+	unsigned int threshold;
+
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	void (*notify)(struct virtqueue *vq);
 
@@ -66,6 +71,13 @@ struct vring_virtqueue
 };
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
+
+static unsigned outstanding_buffers(struct vring_virtqueue *vq)
+{
+	/* Difference between available index and used index */
+	return (vq->vring.avail->idx + vq->vring.num - vq->vring.used->idx)
+		% vq->vring.num;
+}
 
 static int vring_add_buf(struct virtqueue *_vq,
 			 struct scatterlist sg[],
@@ -87,6 +99,12 @@ static int vring_add_buf(struct virtqueu
 			 out + in, vq->num_free);
 		/* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */
 		vq->notify(&vq->vq);
+		/* This implies we should have prodded other side earlier */
+		if (vq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
+			if (vq->threshold)
+				vq->threshold--;
+		}
+
 		END_USE(vq);
 		return -ENOSPC;
 	}
@@ -131,18 +149,42 @@ static void vring_kick(struct virtqueue 
 static void vring_kick(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int added, obufs = outstanding_buffers(vq);
+
 	START_USE(vq);
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
 	wmb();
 
-	vq->vring.avail->idx += vq->num_added;
+	added = vq->num_added;
+	vq->vring.avail->idx += added;
 	vq->num_added = 0;
 
 	/* Need to update avail index before checking if we should notify */
 	mb();
 
-	if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
+	if (vq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
+		/* Doesn't want to know?  Prod as we pass threshold. */
+		if (obufs < vq->threshold && obufs + added >= vq->threshold)
+			vq->notify(&vq->vq);
+
+		obufs += added;
+
+		/* Did we just pass a peak?  Fold it into threshold. */
+		if (obufs < vq->last_outstanding) {
+			/* Peak over threshold?  Increase towards max. */
+			if (vq->last_outstanding >= vq->threshold) {
+				vq->threshold = (vq->last_outstanding
+						 + vq->vring.num) / 2;
+			} else {
+				/* Decay threshold a little. */
+				vq->threshold -= vq->threshold / 8;
+				vq->threshold += vq->last_outstanding / 8;
+			}
+			printk("Threshold = %u\n", vq->threshold);
+		}
+		vq->last_outstanding = obufs;
+	} else
 		/* Prod other side to tell it about changes. */
 		vq->notify(&vq->vq);
 
@@ -297,6 +339,8 @@ struct virtqueue *vring_new_virtqueue(un
 	vq->notify = notify;
 	vq->broken = false;
 	vq->num_added = 0;
+	vq->threshold = num/2;
+	vq->last_outstanding = 0;
 #ifdef DEBUG
 	vq->in_use = false;
 #endif
