From: Siarhei Liakh <sliakh.lkml@gmail.com>

RO/NX protection for loadable kernel modules

Date: Wed, 8 Jul 2009 19:10:34 -0400
This patch is a logical extension of the protection provided by
CONFIG_DEBUG_RODATA to LKMs. The protection is provided by splitting
module_core and module_init into three logical parts each and setting
appropriate page access permissions for each individual section:

 1. Code: RO+X
 2. RO data: RO+NX
 3. RW data: RW+NX

In order to achieve proper protection, layout_sections() have been
modified to align each of the three parts mentioned above onto page
boundary. Next, the corresponding page access permissions are set
right before successful exit from load_module(). Further, free_module()
and sys_init_module have been modified to set module_core and
module_init as RW+NX right before calling module_free().

By default, the original section layout is preserved and RO/NX is
enforced only for whole pages of same content.
However, when compiled with CONFIG_DEBUG_RODATA=y, the patch
will page-align each group of sections to ensure that each page contains
only one type of content.

v1: Initial proof-of-concept patch.

v2: The patch have been re-written to reduce the number of #ifdefs and
to make it architecture-agnostic. Code formatting have been corrected also.

v3: Opportunistic RO/NX protectiuon is now unconditional. Section
page-alignment is enabled when CONFIG_DEBUG_RODATA=y.

v4: Removed most macros and improved coding style.

v5: Changed page-alignment and RO/NX section size calculation

The patch have been developed for Linux 2.6.30 by Siarhei Liakh
<sliakh.lkml@gmail.com> and Xuxian Jiang <jiang@cs.ncsu.edu>.

Signed-off-by: Siarhei Liakh <sliakh.lkml@gmail.com>
Signed-off-by: Xuxian Jiang <jiang@cs.ncsu.edu>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (fixes)
---
 include/linux/module.h |    3 +
 kernel/module.c        |  132 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 133 insertions(+), 2 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -296,6 +296,9 @@ struct module
 	/* The size of the executable code in each section.  */
 	unsigned int init_text_size, core_text_size;
 
+	/* Size of RO sections of the module (text+rodata) */
+	unsigned int init_ro_size, core_ro_size;
+
 	/* Arch-specific module values */
 	struct mod_arch_specific arch;
 
diff --git a/kernel/module.c b/kernel/module.c
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -54,6 +54,7 @@
 #include <linux/async.h>
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
+#include <linux/pfn.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/module.h>
@@ -70,6 +71,22 @@ EXPORT_TRACEPOINT_SYMBOL(module_get);
 #define ARCH_SHF_SMALL 0
 #endif
 
+/* Modules' sections will be aligned on page boundaries
+ * to ensure complete separation of code and data, but
+ * only when CONFIG_DEBUG_RODATA=y */
+#ifdef CONFIG_DEBUG_RODATA
+#define debug_align(X) ALIGN(X, PAGE_SIZE)
+#else
+#define debug_align(X) (X)
+#endif
+
+/* Given BASE and SIZE this macro calculates the number of pages the
+ * memory regions occupies */
+#define NUMBER_OF_PAGES(BASE, SIZE) ((SIZE > 0) ?		\
+		(PFN_DOWN((unsigned long)BASE + SIZE - 1) -	\
+			 PFN_DOWN((unsigned long)BASE) + 1)	\
+		: (0UL))
+
 /* If this is set, the section belongs in the init part of the module */
 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
 
@@ -1501,6 +1518,78 @@ static int __unlink_module(void *_mod)
 	return 0;
 }
 
+/* LKM RO/NX protection: protect module's text/ro-data
+ * from modification and any data from execution.
+ * Siarhei Liakh, Xuxian Jiang  */
+static void set_section_ro_nx(void *base,
+			unsigned long text_size,
+			unsigned long ro_size,
+			unsigned long total_size)
+{
+	/* begin and end PFNs of the current subsection */
+	unsigned long begin_pfn;
+	unsigned long end_pfn;
+
+#ifdef CONFIG_DEBUG_RODATA
+	/* Most module_alloc use vmalloc which page-aligns.  If not,
+	 * we could be missing protection on first part of module. */
+	WARN_ON(offset_in_page((unsigned long)base));
+#endif
+
+	/* Initially, all module sections have RWX permissions*/
+	DEBUGP("PROTECTING MODULE SECTION: 0x%lx\n"
+			"  text size: %lu\n"
+			"  ro size: %lu\n"
+			"  total size: %lu\n",
+			(unsigned long)base,
+	       text_size, ro_size, total_size);
+
+	/* Set RO for module text and RO-data */
+	/* Don't protect partial pages. */
+	begin_pfn = PFN_UP((unsigned long)base);
+	end_pfn = PFN_DOWN((unsigned long)base + ro_size);
+
+	/* Set text RO if there are still pages between begin and end */
+	if (end_pfn > begin_pfn) {
+		DEBUGP("  RO: 0x%lx %lu\n",
+		       begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+		set_memory_ro(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+	}
+
+	/* Set NX permissions for module data */
+	/* Don't protect partial pages. */
+	begin_pfn = PFN_UP((unsigned long)base + text_size);
+	end_pfn = PFN_DOWN((unsigned long)base + total_size);
+
+	if (end_pfn > begin_pfn) {
+		DEBUGP("  NX: 0x%lx %lu\n",
+		       begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+		set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+	}
+}
+
+/* Setting memory back to RW+NX before releasing it */
+void unset_section_ro_nx(struct module *mod, void *module_region)
+{
+	unsigned long total_pages;
+
+	if (mod->module_core == module_region) {
+		/* Set core as NX+RW */
+		total_pages = NUMBER_OF_PAGES(mod->module_core, mod->core_size);
+		DEBUGP("Restoring RW+NX for module's CORE: 0x%lx %lu\n",
+				(unsigned long)mod->module_core, total_pages);
+		set_memory_nx((unsigned long)mod->module_core, total_pages);
+		set_memory_rw((unsigned long)mod->module_core, total_pages);
+	} else if (mod->module_init == module_region) {
+		/* Set init as NX+RW */
+		total_pages = NUMBER_OF_PAGES(mod->module_init, mod->init_size);
+		DEBUGP("Restoring RW+NX for module's INIT: 0x%lx %lu\n",
+				(unsigned long)mod->module_init, total_pages);
+		set_memory_nx((unsigned long)mod->module_init, total_pages);
+		set_memory_rw((unsigned long)mod->module_init, total_pages);
+	}
+}
+
 /* Free a module, remove from lists, etc (must hold module_mutex). */
 static void free_module(struct module *mod)
 {
@@ -1522,6 +1611,7 @@ static void free_module(struct module *m
 	destroy_params(mod->kp, mod->num_kp);
 
 	/* This may be NULL, but that's OK */
+	unset_section_ro_nx(mod, mod->module_init);
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
 	if (mod->percpu)
@@ -1534,6 +1624,7 @@ static void free_module(struct module *m
 	lockdep_free_key_range(mod->module_core, mod->core_size);
 
 	/* Finally, free the core (containing the module structure) */
+	unset_section_ro_nx(mod, mod->module_core);
 	module_free(mod, mod->module_core);
 }
 
@@ -1707,8 +1798,20 @@ static void layout_sections(struct modul
 			s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
 			DEBUGP("\t%s\n", secstrings + s->sh_name);
 		}
-		if (m == 0)
+		switch (m) {
+		case 0: /* executable */
+			mod->core_size = debug_align(mod->core_size);
 			mod->core_text_size = mod->core_size;
+			break;
+		case 1: /* RO: text and ro-data */
+			mod->core_size = debug_align(mod->core_size);
+			mod->core_ro_size = mod->core_size;
+			break;
+		case 3: /* whole module core (executable + RO data +
+			 * RW data + small alloc) */
+			mod->core_size = debug_align(mod->core_size);
+			break;
+		}
 	}
 
 	DEBUGP("Init section allocation order:\n");
@@ -1725,8 +1828,20 @@ static void layout_sections(struct modul
 					 | INIT_OFFSET_MASK);
 			DEBUGP("\t%s\n", secstrings + s->sh_name);
 		}
-		if (m == 0)
+		switch (m) {
+		case 0: /* executable */
+			mod->init_size = debug_align(mod->init_size);
 			mod->init_text_size = mod->init_size;
+			break;
+		case 1: /* RO: text and ro-data */
+			mod->init_size = debug_align(mod->init_size);
+			mod->init_ro_size = mod->init_size;
+			break;
+		case 3: /* whole module init (executable + RO data +
+			 * RW data + small alloc) */
+			mod->init_size = debug_align(mod->init_size);
+			break;
+		}
 	}
 }
 
@@ -2496,6 +2611,18 @@ static noinline struct module *load_modu
 
 	trace_module_load(mod);
 
+	/* Set RO and NX regions for core */
+	set_section_ro_nx(mod->module_core,
+			  mod->core_text_size,
+			  mod->core_ro_size,
+			  mod->core_size);
+
+	/* Set RO and NX regions for init */
+	set_section_ro_nx(mod->module_init,
+			  mod->init_text_size,
+			  mod->init_ro_size,
+			  mod->init_size);
+
 	/* Done! */
 	return mod;
 
@@ -2611,6 +2738,7 @@ SYSCALL_DEFINE3(init_module, void __user
 	mutex_lock(&module_mutex);
 	/* Drop initial reference. */
 	module_put(mod);
+	unset_section_ro_nx(mod, mod->module_init);
 	trim_init_extable(mod);
 #ifdef CONFIG_KALLSYMS
 	mod->num_symtab = mod->core_num_syms;
