virtio: put last_used and last_avail index into ring itself.

Generally, the other end of the virtio ring doesn't need to see where
you're up to in consuming the ring.  However, to completely understand
what's going on from the outside, this information must be exposed.
For example, if you want to save and restore a virtio_ring, but you're
not the consumer because the kernel is using it directly.

Fortunately, we have room to expand: the ring is always a whole number
of pages and there's hundreds of bytes of padding after the avail ring
and the used ring, whatever the number of descriptors (which must be a
power of 2).

We add a feature bit so the guest can tell the host that it's writing
out the current value there, if it wants to use that.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c |    3 ++-
 drivers/virtio/virtio_pci.c    |    6 +++++-
 drivers/virtio/virtio_ring.c   |   28 ++++++++++++++++++++--------
 include/linux/virtio_ring.h    |   15 ++++++++++++++-
 4 files changed, 41 insertions(+), 11 deletions(-)

diff -r 85737cdd1f42 drivers/lguest/lguest_device.c
--- a/drivers/lguest/lguest_device.c	Tue Apr 22 13:48:58 2008 +1000
+++ b/drivers/lguest/lguest_device.c	Tue Apr 22 13:54:37 2008 +1000
@@ -98,7 +98,8 @@ static u32 lg_get_features(struct virtio
 		if (in_features[i / 8] & (1 << (i % 8)))
 			features |= (1 << i);
 
-	return features;
+	/* Vring may want to play with the bits it's offered. */
+	return vring_transport_features(features);
 }
 
 static void lg_set_features(struct virtio_device *vdev, u32 features)
diff -r 85737cdd1f42 drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c	Tue Apr 22 13:48:58 2008 +1000
+++ b/drivers/virtio/virtio_pci.c	Tue Apr 22 13:54:37 2008 +1000
@@ -91,10 +91,14 @@ static u32 vp_get_features(struct virtio
 static u32 vp_get_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	u32 features;
 
 	/* When someone needs more than 32 feature bits, we'll need to
 	 * steal a bit to indicate that the rest are somewhere else. */
-	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
+	features = ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
+
+	/* Vring may want to play with the bits it's offered. */
+	return vring_transport_features(features);
 }
 
 /* virtio config->set_features() implementation */
diff -r 85737cdd1f42 drivers/virtio/virtio_ring.c
--- a/drivers/virtio/virtio_ring.c	Tue Apr 22 13:48:58 2008 +1000
+++ b/drivers/virtio/virtio_ring.c	Tue Apr 22 13:54:37 2008 +1000
@@ -18,6 +18,7 @@
  */
 #include <linux/virtio.h>
 #include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
 #include <linux/device.h>
 
 #ifdef DEBUG
@@ -51,9 +52,6 @@ struct vring_virtqueue
 	unsigned int free_head;
 	/* Number we've added since last sync. */
 	unsigned int num_added;
-
-	/* Last used index we've seen. */
-	u16 last_used_idx;
 
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	void (*notify)(struct virtqueue *vq);
@@ -173,12 +171,13 @@ static void detach_buf(struct vring_virt
 
 static inline bool more_used(const struct vring_virtqueue *vq)
 {
-	return vq->last_used_idx != vq->vring.used->idx;
+	return vring_last_used(&vq->vring) != vq->vring.used->idx;
 }
 
 static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_used_elem *u;
 	void *ret;
 	unsigned int i;
 
@@ -195,8 +194,11 @@ static void *vring_get_buf(struct virtqu
 		return NULL;
 	}
 
-	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
-	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
+	u = &vq->vring.used->ring[vring_last_used(&vq->vring) % vq->vring.num];
+	i = u->id;
+	*len = u->len;
+	/* Make sure we don't reload i after doing checks. */
+	rmb();
 
 	if (unlikely(i >= vq->vring.num)) {
 		BAD_RING(vq, "id %u out of range\n", i);
@@ -210,7 +212,7 @@ static void *vring_get_buf(struct virtqu
 	/* detach_buf clears data, so grab it now. */
 	ret = vq->data[i];
 	detach_buf(vq, i);
-	vq->last_used_idx++;
+	vring_last_used(&vq->vring)++;
 	END_USE(vq);
 	return ret;
 }
@@ -302,7 +304,6 @@ struct virtqueue *vring_new_virtqueue(un
 	vq->vq.vq_ops = &vring_vq_ops;
 	vq->notify = notify;
 	vq->broken = false;
-	vq->last_used_idx = 0;
 	vq->num_added = 0;
 #ifdef DEBUG
 	vq->in_use = false;
@@ -328,4 +329,15 @@ void vring_del_virtqueue(struct virtqueu
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
 
+/* Manipulates transport-specific feature bits. */
+u32 vring_transport_features(u32 features)
+{
+	u32 mask = ~VIRTIO_TRANSPORT_F_MASK;
+
+	/* We let through any non-transport bits, and the only one we know. */
+	mask &= ~(1 << VIRTIO_RING_F_PUBLISH_INDICES);
+	return features & mask;
+}
+EXPORT_SYMBOL_GPL(vring_transport_features);
+
 MODULE_LICENSE("GPL");
diff -r 85737cdd1f42 include/linux/virtio_ring.h
--- a/include/linux/virtio_ring.h	Tue Apr 22 13:48:58 2008 +1000
+++ b/include/linux/virtio_ring.h	Tue Apr 22 13:54:37 2008 +1000
@@ -23,6 +23,9 @@
  * when you consume a buffer.  It's unreliable, so it's simply an
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT	1
+
+/* We publish our last-seen used index at the end of the avail ring. */
+#define VIRTIO_RING_F_PUBLISH_INDICES	24
 
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc
@@ -82,6 +85,7 @@ struct vring {
  *	__u16 avail_flags;
  *	__u16 avail_idx;
  *	__u16 available[num];
+ *	__u16 last_used_idx;
  *
  *	// Padding to the next page boundary.
  *	char pad[];
@@ -90,6 +94,7 @@ struct vring {
  *	__u16 used_flags;
  *	__u16 used_idx;
  *	struct vring_used_elem used[num];
+ *	__u16 last_avail_idx;
  * };
  */
 static inline void vring_init(struct vring *vr, unsigned int num, void *p,
@@ -106,8 +111,13 @@ static inline unsigned vring_size(unsign
 {
 	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
 		 + pagesize - 1) & ~(pagesize - 1))
-		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
+		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num + 2;
 }
+
+/* We publish the last-seen used index at the end of the available ring, and
+ * vice-versa.  These are at the end for backwards compatibility. */
+#define vring_last_used(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_last_avail(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
 
 #ifdef __KERNEL__
 #include <linux/irqreturn.h>
@@ -121,6 +131,9 @@ struct virtqueue *vring_new_virtqueue(un
 				      void (*callback)(struct virtqueue *vq));
 void vring_del_virtqueue(struct virtqueue *vq);
 
+/* Filter out unsupported transport-specific feature bits. */
+u32 vring_transport_features(u32 features);
+
 irqreturn_t vring_interrupt(int irq, void *_vq);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_RING_H */
