lguest: Tell guest net not to notify us on every packet xmit

virtio_ring has the ability to suppress notifications.  This prevents
a guest exit for every packet, but we need to set a timer on packet
receipt to re-check if there were any remaining packets.

Here are the times for 1G TCP Guest->Host with different timeout
settings (it matters because the TCP window doesn't grow big enough to
fill the entire buffer):

Timeout value		Seconds		Timeouts	Notifications
None (before)		30.1873		0		5627031
1000 usec		29.8546		207119		19
300 usec		18.2851		234960		1631
250 usec		17.5533		285292		1050
200 usec		17.7661		364157		2009
50 usec			22.4515		1690048		144327

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c |   86 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 83 insertions(+), 3 deletions(-)

diff -r e887ecd88c7f Documentation/lguest/lguest.c
--- a/Documentation/lguest/lguest.c	Wed Jun 04 10:35:16 2008 +1000
+++ b/Documentation/lguest/lguest.c	Wed Jun 04 16:48:49 2008 +1000
@@ -36,6 +36,7 @@
 #include <sched.h>
 #include <limits.h>
 #include <stddef.h>
+#include <signal.h>
 #include "linux/lguest_launcher.h"
 #include "linux/virtio_config.h"
 #include "linux/virtio_net.h"
@@ -81,6 +82,8 @@ static void *guest_base;
 static void *guest_base;
 /* The maximum guest physical address allowed, and maximum possible. */
 static unsigned long guest_limit, guest_max;
+/* The pipe for signal hander to write to. */
+static int timeoutpipe[2];
 
 /* a per-cpu variable indicating whose vcpu is currently running */
 static unsigned int __thread cpu_id;
@@ -158,6 +161,9 @@ struct virtqueue
 
 	/* Outstanding buffers */
 	unsigned int inflight;
+
+	/* Is this blocked awaiting a timer? */
+	bool blocked;
 };
 
 /* Remember the arguments to the program so we can "reboot" */
@@ -882,6 +888,21 @@ static void handle_console_output(int fd
 	}
 }
 
+static void block_vq(struct virtqueue *vq)
+{
+	struct itimerval itm;
+
+	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+	vq->blocked = true;
+
+	itm.it_interval.tv_sec = 0;
+	itm.it_interval.tv_usec = 0;
+	itm.it_value.tv_sec = 0;
+	itm.it_value.tv_usec = 250;
+
+	setitimer(ITIMER_REAL, &itm, NULL);
+}
+
 /*
  * The Network
  *
@@ -891,7 +912,7 @@ static void handle_console_output(int fd
  */
 static void handle_net_output(int fd, struct virtqueue *vq)
 {
-	unsigned int head, out, in;
+	unsigned int head, out, in, num = 0;
 	int len;
 	struct iovec iov[vq->vring.num];
 
@@ -905,7 +926,12 @@ static void handle_net_output(int fd, st
 		(void)convert(&iov[0], struct virtio_net_hdr);
 		len = writev(vq->dev->fd, iov+1, out-1);
 		add_used_and_trigger(fd, vq, head, len);
+		num++;
 	}
+
+	/* Block further kicks, and set up a timer if we saw anything. */
+	if (num)
+		block_vq(vq);
 }
 
 /* This is where we handle a packet coming in from the tun device to our
@@ -1042,6 +1068,29 @@ static void handle_output(int fd, unsign
 	      strnlen(from_guest_phys(addr), guest_limit - addr));
 }
 
+static void handle_timeout(int fd)
+{
+	char buf[32];
+	struct device *i;
+	struct virtqueue *vq;
+
+	/* Clear the pipe */
+	read(timeoutpipe[0], buf, sizeof(buf));
+
+	/* Check each device and virtqueue: flush blocked ones. */
+	for (i = devices.dev; i; i = i->next) {
+		for (vq = i->vq; vq; vq = vq->next) {
+			if (!vq->blocked)
+				continue;
+
+			vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+			vq->blocked = false;
+			if (vq->handle_output)
+				vq->handle_output(fd, vq);
+		}
+	}
+}
+
 /* This is called when the Waker wakes us up: check for incoming file
  * descriptors. */
 static void handle_input(int fd)
@@ -1052,9 +1101,14 @@ static void handle_input(int fd)
 	for (;;) {
 		struct device *i;
 		fd_set fds = devices.infds;
+		int num;
 
+		num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
+		/* Could get interrupted */
+		if (num < 0)
+			continue;
 		/* If nothing is ready, we're done. */
-		if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0)
+		if (num == 0)
 			break;
 
 		/* Otherwise, call the device(s) which have readable file
@@ -1078,6 +1132,10 @@ static void handle_input(int fd)
 				write(waker_fd, &dev_fd, sizeof(dev_fd));
 			}
 		}
+
+		/* Is this the timeout fd? */
+		if (FD_ISSET(timeoutpipe[0], &fds))
+			handle_timeout(fd);
 	}
 }
 
@@ -1141,6 +1199,7 @@ static void add_virtqueue(struct device 
 	vq->next = NULL;
 	vq->dev = dev;
 	vq->inflight = 0;
+	vq->blocked = false;
 
 	/* Initialize the configuration. */
 	vq->config.num = num_descs;
@@ -1278,6 +1337,24 @@ static void setup_console(void)
 	verbose("device %u: console\n", devices.device_num++);
 }
 /*:*/
+
+static void timeout_alarm(int sig)
+{
+	write(timeoutpipe[1], "", 1);
+}
+
+static void setup_timeout(void)
+{
+	if (pipe(timeoutpipe) != 0)
+		err(1, "Creating timeout pipe");
+
+	if (fcntl(timeoutpipe[1], F_SETFL,
+		  fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
+		err(1, "Making timeout pipe nonblocking");
+
+	add_device_fd(timeoutpipe[0]);
+	signal(SIGALRM, timeout_alarm);
+}
 
 /*M:010 Inter-guest networking is an interesting area.  Simplest is to have a
  * --sharenet=<name> option which opens or creates a named pipe.  This can be
@@ -1754,7 +1831,7 @@ static void __attribute__((noreturn)) ru
 		/* ERESTART means that we need to reboot the guest */
 		} else if (errno == ERESTART) {
 			restart_guest();
-		/* EAGAIN means the Waker wanted us to look at some input.
+		/* EAGAIN means a signal (timeout).
 		 * Anything else means a bug or incompatible change. */
 		} else if (errno != EAGAIN)
 			err(1, "Running guest failed");
@@ -1878,6 +1955,9 @@ int main(int argc, char *argv[])
 	/* We always have a console device */
 	setup_console();
 
+	/* We can timeout waiting for Guest network transmit. */
+	setup_timeout();
+
 	/* Now we load the kernel */
 	start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
 
