Subject: virtio: Standardize virtio's concept of "page size"
Date: Mon, 10 Nov 2008 17:37:09 -0600
From: Hollis Blanchard <hollisb@us.ibm.com>

Both sides of the virtio interface must agree about how big a pfn really is.
This is particularly an issue on architectures where the page size is
configurable (e.g. PowerPC, IA64) -- the interface must be independent of
PAGE_SHIFT.

Currently there are three distinct problems:
* The shift count used when passing the physical address of the ring to a
  PCI-based back end.
* The ring layout itself is padded to span at least two "pages".
* The balloon driver operates in units of "pages".

This patch hardcodes all virtio "page sizes" to 4KB. It should have no
functional effect on x86, but it does change behavior on IA64.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c |    6 +++---
 drivers/s390/kvm/kvm_virtio.c |    9 +++------
 drivers/virtio/virtio_pci.c   |    4 ++--
 drivers/virtio/virtio_ring.c  |    2 +-
 include/linux/virtio.h        |    5 +++++
 include/linux/virtio_ring.h   |   14 ++++++--------
 6 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1030,7 +1030,7 @@ static void update_device_status(struct 
 		/* Zero out the virtqueues. */
 		for (vq = dev->vq; vq; vq = vq->next) {
 			memset(vq->vring.desc, 0,
-			       vring_size(vq->config.num, getpagesize()));
+			       vring_size(vq->config.num));
 			lg_last_avail(vq) = 0;
 		}
 	} else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
@@ -1211,7 +1211,7 @@ static void add_virtqueue(struct device 
 	void *p;
 
 	/* First we need some memory for this virtqueue. */
-	pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
+	pages = (vring_size(num_descs) + getpagesize() - 1)
 		/ getpagesize();
 	p = get_pages(pages);
 
@@ -1228,7 +1228,7 @@ static void add_virtqueue(struct device 
 	vq->config.pfn = to_guest_phys(p) / getpagesize();
 
 	/* Initialize the vring. */
-	vring_init(&vq->vring, num_descs, p, getpagesize());
+	vring_init(&vq->vring, num_descs, p);
 
 	/* Append virtqueue to this device's descriptor.  We use
 	 * device_config() to get the end of the device's current virtqueues;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -187,8 +187,7 @@ static struct virtqueue *kvm_find_vq(str
 
 	config = kvm_vq_config(kdev->desc)+index;
 
-	err = vmem_add_mapping(config->address,
-			       vring_size(config->num, PAGE_SIZE));
+	err = vmem_add_mapping(config->address, vring_size(config->num));
 	if (err)
 		goto out;
 
@@ -208,8 +207,7 @@ static struct virtqueue *kvm_find_vq(str
 	vq->priv = config;
 	return vq;
 unmap:
-	vmem_remove_mapping(config->address,
-			    vring_size(config->num, PAGE_SIZE));
+	vmem_remove_mapping(config->address, vring_size(config->num));
 out:
 	return ERR_PTR(err);
 }
@@ -219,8 +217,7 @@ static void kvm_del_vq(struct virtqueue 
 	struct kvm_vqconfig *config = vq->priv;
 
 	vring_del_virtqueue(vq);
-	vmem_remove_mapping(config->address,
-			    vring_size(config->num, PAGE_SIZE));
+	vmem_remove_mapping(config->address, vring_size(config->num));
 }
 
 /*
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -237,14 +237,14 @@ static struct virtqueue *vp_find_vq(stru
 	info->queue_index = index;
 	info->num = num;
 
-	info->queue = kzalloc(PAGE_ALIGN(vring_size(num,PAGE_SIZE)), GFP_KERNEL);
+	info->queue = kzalloc(PAGE_ALIGN(vring_size(num)), GFP_KERNEL);
 	if (info->queue == NULL) {
 		err = -ENOMEM;
 		goto out_info;
 	}
 
 	/* activate the queue */
-	iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT,
+	iowrite32(virt_to_phys(info->queue) >> VIRTIO_PAGE_SHIFT,
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -292,7 +292,7 @@ struct virtqueue *vring_new_virtqueue(un
 	if (!vq)
 		return NULL;
 
-	vring_init(&vq->vring, num, pages, PAGE_SIZE);
+	vring_init(&vq->vring, num, pages);
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.vq_ops = &vring_vq_ops;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -7,6 +7,11 @@
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
+
+#define VIRTIO_PAGE_SHIFT    12
+#define VIRTIO_PAGE_SIZE     (1<<VIRTIO_PAGE_SHIFT)
+#define VIRTIO_PAGE_MASK     ~(VIRTIO_PAGE_SIZE - 1)
+#define VIRTIO_PAGE_ALIGN(x) (((x) + VIRTIO_PAGE_SIZE - 1) & VIRTIO_PAGE_MASK)
 
 /**
  * virtqueue - a queue to register buffers for sending or receiving.
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -92,21 +92,19 @@ struct vring {
  *	struct vring_used_elem used[num];
  * };
  */
-static inline void vring_init(struct vring *vr, unsigned int num, void *p,
-			      unsigned long pagesize)
+static inline void vring_init(struct vring *vr, unsigned int num, void *p)
 {
 	vr->num = num;
 	vr->desc = p;
 	vr->avail = p + num*sizeof(struct vring_desc);
-	vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + pagesize-1)
-			    & ~(pagesize - 1));
+	vr->used = (void *)VIRTIO_PAGE_ALIGN((unsigned long)&vr->avail->ring[num]);
 }
 
-static inline unsigned vring_size(unsigned int num, unsigned long pagesize)
+static inline unsigned vring_size(unsigned int num)
 {
-	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
-		 + pagesize - 1) & ~(pagesize - 1))
-		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
+	return VIRTIO_PAGE_ALIGN(sizeof(struct vring_desc) * num +
+	    sizeof(__u16) * (2 + num)) +
+		sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
 }
 
 #ifdef __KERNEL__
