Subject: lguest: guest PSE support
Date: Tue, 19 Aug 2008 02:16:39 -0300
From: "Matias Zabaljauregui" <zabaljauregui@gmail.com>

Allow the guest to use PSE and back the large pages with 4KB shadow pages

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/lguest/boot.c       |    3 +
 drivers/lguest/page_tables.c |   76 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -341,7 +341,8 @@ static void lguest_cpuid(unsigned int *a
 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
 		 * flush_tlb_user() for both user and kernel mappings unless
 		 * the Page Global Enable (PGE) feature bit is set. */
-		*dx |= 0x00002000;
+		/* Lets also try with PSE support */
+		*dx |= 0x00002008;
 		break;
 	case 0x80000000:
 		/* Futureproof this a little: if they ask how much extended
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -203,14 +203,84 @@ int demand_page(struct lg_cpu *cpu, unsi
 	pgd_t gpgd;
 	pgd_t *spgd;
 	unsigned long gpte_ptr;
+	unsigned long gpgd_ptr;
 	pte_t gpte;
 	pte_t *spte;
+	int i = 0;
+	pte_t fake_gpte;
+	pte_t *ptep;
+	unsigned long frame;
+	unsigned long ptepage;
 
 	/* First step: get the top-level Guest page table entry. */
-	gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
+	gpgd_ptr = gpgd_addr(cpu, vaddr);
+	gpgd = lgread(cpu, gpgd_ptr, pgd_t);
 	/* Toplevel not present?  We can't map it in. */
 	if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
 		return 0;
+
+	/* If the gpgd is actually pointing to a 4MB page,
+	 * instead of pointing to a pte page, we will back it
+	 * with 4KB pages in the host */
+	if (pgd_flags(gpgd) & _PAGE_PSE) {
+		/* Check they're not trying to write to a page the Guest wants
+		 * read-only (bit 2 of errcode == write). */
+		if ((errcode & 2) && !(pgd_flags(gpgd) & _PAGE_RW))
+			return 0;
+
+		/* User access to a kernel-only page? (bit 3 == user access) */
+		if ((errcode & 4) && !(pgd_flags(gpgd) & _PAGE_USER))
+			return 0;
+
+		/* Is the 4MB page within the guest limits? */
+		if (pgd_pfn(gpgd) + ((PGDIR_SIZE - PAGE_SIZE) >> PAGE_SHIFT) >=
+		    cpu->lg->pfn_limit)
+			kill_guest(cpu, "large page out of limits");
+
+		/* Now look at the matching shadow entry. */
+		spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
+
+		/* No shadow entry: allocate a new shadow PTE page. */
+		if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
+			ptepage = get_zeroed_page(GFP_KERNEL);
+			if (!ptepage)
+				kill_guest(cpu,
+					   "out of memory allocating pte page");
+			/* We build a shadow pgd, pointing to the PTE page */
+			set_pgd(spgd, __pgd(__pa(ptepage) |
+			      (pgd_flags(gpgd) & ~_PAGE_PSE &  _PAGE_TABLE)));
+		}
+
+		/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
+		gpgd = __pgd(pgd_val(gpgd) | _PAGE_ACCESSED);
+		if (errcode & 2)
+			gpgd = __pgd(pgd_val(gpgd) | _PAGE_DIRTY);
+
+		/* We will we use this pointer to populate the shadow PTE page */
+		ptep = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
+
+		/* We will create a fake gpte, so we can use gpte_to_spte function */
+		frame = pgd_pfn(gpgd) << PAGE_SHIFT;
+
+		/* And here, we completely populate the shadow PTE page,
+		 * so we map the 1024 4KB pages, backing the 4MB guest page */
+		for (; i < PTRS_PER_PGD; i++) {
+			fake_gpte =
+			    __pte(frame | (pgd_flags(gpgd) & ~_PAGE_PSE));
+			frame = frame + PAGE_SIZE;
+			release_pte(ptep[i]);
+			if (pgd_val(gpgd) & _PAGE_DIRTY)
+				ptep[i] = gpte_to_spte(cpu, fake_gpte, 1);
+			else
+				ptep[i] =
+				    gpte_to_spte(cpu, pte_wrprotect(fake_gpte),
+						 0);
+		}
+		/* Finally, we write the Guest PGD entry back: we've set the
+		 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
+		lgwrite(cpu, gpgd_ptr, pgd_t, gpgd);
+		return 1;
+	} /* (pgd_flags(gpgd) & _PAGE_PSE) */
 
 	/* Now look at the matching shadow entry. */
 	spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
@@ -374,6 +444,10 @@ unsigned long guest_pa(struct lg_cpu *cp
 	/* Toplevel not present?  We can't map it in. */
 	if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
 		kill_guest(cpu, "Bad address %#lx", vaddr);
+
+	/* Is it a large page? We don't need any gpte to return the address */
+	if (pgd_flags(gpgd) & _PAGE_PSE)
+		return (pgd_val(gpgd) & PGDIR_MASK) | (vaddr & ~PGDIR_MASK);
 
 	gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t);
 	if (!(pte_flags(gpte) & _PAGE_PRESENT))
