tun: vringfd receive support.

This patch modifies tun to allow a vringfd to specify the receive
buffer.  Because we can't copy to userspace in bh context, we queue
like normal then use the "pull" hook to actually do the copy.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/Kconfig    |    2 
 drivers/net/tun.c      |  125 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/if_tun.h |    1 
 3 files changed, 128 insertions(+)

diff -r 361350b8c366 Documentation/lguest/lguest
Binary file Documentation/lguest/lguest has changed

diff -r aae987ebf293 drivers/net/Kconfig
--- a/drivers/net/Kconfig	Mon Aug 25 10:13:25 2008 +1000
+++ b/drivers/net/Kconfig	Mon Aug 25 10:14:35 2008 +1000
@@ -112,6 +112,8 @@ config TUN
 config TUN
 	tristate "Universal TUN/TAP device driver support"
 	select CRC32
+# If no VRING at all, that's fine, but if it's a module, we must be, too.
+	depends on !VRING || VRING
 	---help---
 	  TUN/TAP provides packet reception and transmission for user space
 	  programs.  It can be viewed as a simple Point-to-Point or Ethernet
diff -r aae987ebf293 drivers/net/tun.c
--- a/drivers/net/tun.c	Mon Aug 25 10:13:25 2008 +1000
+++ b/drivers/net/tun.c	Mon Aug 25 10:14:35 2008 +1000
@@ -61,6 +61,9 @@
 #include <linux/crc32.h>
 #include <linux/nsproxy.h>
 #include <linux/virtio_net.h>
+#include <linux/vring.h>
+#include <linux/virtio_net.h>
+#include <linux/file.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -101,6 +104,9 @@ struct tun_struct {
 	struct fasync_struct	*fasync;
 
 	struct tap_filter       txflt;
+
+	struct vring_info	*inring;
+	struct file		*infile;
 
 #ifdef TUN_DEBUG
 	int debug;
@@ -274,6 +280,10 @@ static int tun_net_xmit(struct sk_buff *
 	/* Notify and wake up reader process */
 	if (tun->flags & TUN_FASYNC)
 		kill_fasync(&tun->fasync, SIGIO, POLL_IN);
+
+	if (tun->inring)
+		vring_wake(tun->inring);
+
 	wake_up_interruptible(&tun->read_wait);
 	return 0;
 
@@ -666,6 +676,115 @@ static ssize_t tun_chr_aio_read(struct k
 	return ret;
 }
 
+#if defined(CONFIG_VRING) || defined(CONFIG_VRING_MODULE)
+/* Returns whether there are queued buffers */
+static bool pending_recv_skbs(void *_tun)
+{
+	struct tun_struct *tun = _tun;
+
+	return !skb_queue_empty(&tun->readq) && vring_has_buffer(tun->inring);
+}
+
+/* Returns 0, or negative errno. */
+static int pull_recv_skbs(void *_tun)
+{
+	struct tun_struct *tun = _tun;
+	ssize_t err = 0, num_copied = 0;
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&tun->readq)) != NULL) {
+		struct iovec iov[2+MAX_SKB_FRAGS];
+		unsigned long len;
+		int id;
+
+		tun->dev->stats.tx_packets++;
+		tun->dev->stats.tx_bytes += skb->len;
+
+		id = vring_get_buffer(tun->inring, iov, ARRAY_SIZE(iov), &len,
+				      NULL, 0, NULL);
+		if (id <= 0) {
+			/* We took an skb but ring isn't ready.  Put it back */
+			skb_queue_head(&tun->readq, skb);
+			tun->dev->stats.tx_aborted_errors++;
+			err = id;
+			break;
+		}
+
+		err = tun_put_user(tun, skb, iov, len);
+		kfree_skb(skb);
+		if (unlikely(err < 0)) {
+			tun->dev->stats.tx_fifo_errors++;
+			break;
+		}
+
+		err = vring_used_buffer(tun->inring, id, err);
+		if (unlikely(err)) {
+			tun->dev->stats.tx_fifo_errors++;
+			break;
+		}
+
+		num_copied++;
+	}
+
+	if (num_copied)
+		netif_wake_queue(tun->dev);
+
+	return err;
+}
+
+static struct vring_ops recvops = {
+	.can_pull = pending_recv_skbs,
+	.pull = pull_recv_skbs,
+};
+
+static int set_recv_vring(struct tun_struct *tun, int fd)
+{
+	int err;
+
+	if (tun->inring)
+		return -EBUSY;
+
+	tun->infile = fget(fd);
+	if (!tun->infile)
+		return -EBADF;
+
+	tun->inring = vring_get(tun->infile);
+	if (!tun->inring) {
+		err = -EBADF;
+		goto put;
+	}
+
+	err = vring_set_ops(tun->inring, &recvops, tun);
+	if (err) {
+		tun->inring = NULL;
+		goto put;
+	}
+	return 0;
+
+put:
+	fput(tun->infile);
+	tun->infile = NULL;
+	return err;
+}
+
+static void unset_vrings(struct tun_struct *tun)
+{
+	if (tun->inring) {
+		vring_unset_ops(tun->inring);
+		fput(tun->infile);
+	}
+}
+#else /* ... !CONFIG_VRING */
+static int set_recv_vring(struct tun_struct *tun, int fd)
+{
+	return -ENOTTY;
+}
+
+static void unset_vrings(struct tun_struct *tun)
+{
+}
+#endif
+
 static void tun_setup(struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
@@ -675,6 +794,7 @@ static void tun_setup(struct net_device 
 
 	tun->owner = -1;
 	tun->group = -1;
+	tun->inring = NULL;
 
 	dev->open = tun_net_open;
 	dev->hard_start_xmit = tun_net_xmit;
@@ -1001,6 +1121,9 @@ static int tun_chr_ioctl(struct inode *i
 		rtnl_unlock();
 		return ret;
 
+	case TUNSETRECVVRING:
+		return set_recv_vring(tun, arg);
+
 	case SIOCGIFHWADDR:
 		/* Get hw addres */
 		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
@@ -1071,6 +1194,8 @@ static int tun_chr_close(struct inode *i
 	DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
 
 	tun_chr_fasync(-1, file, 0);
+
+	unset_vrings(tun);
 
 	rtnl_lock();
 
diff -r aae987ebf293 include/linux/if_tun.h
--- a/include/linux/if_tun.h	Mon Aug 25 10:13:25 2008 +1000
+++ b/include/linux/if_tun.h	Mon Aug 25 10:14:35 2008 +1000
@@ -46,6 +46,7 @@
 #define TUNSETOFFLOAD  _IOW('T', 208, unsigned int)
 #define TUNSETTXFILTER _IOW('T', 209, unsigned int)
 #define TUNGETIFF      _IOR('T', 210, unsigned int)
+#define TUNSETRECVVRING _IOW('T', 211, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
