module: reduce locking.

Kay Sievers <kay.sievers@vrfy.org> reports that we still have some
contention over module loading which is slowing boot.

Let's try fine-graining the lock.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 kernel/module.c |   96 +++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 61 insertions(+), 35 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -77,7 +77,8 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq
 
 static BLOCKING_NOTIFIER_HEAD(module_notify_list);
 
-/* Bounds of module allocation, for speeding __module_address */
+/* Bounds of module allocation, for speeding __module_address.
+ * Protected by module_mutex. */
 static unsigned long module_addr_min = -1UL, module_addr_max = 0;
 
 int register_module_notifier(struct notifier_block * nb)
@@ -318,7 +319,7 @@ static bool find_symbol_in_section(const
 }
 
 /* Find a symbol and return it, along with, (optional) crc and
- * (optional) module which owns it */
+ * (optional) module which owns it.  Needs preempt disabled or module_mutex. */
 const struct kernel_symbol *find_symbol(const char *name,
 					struct module **owner,
 					const unsigned long **crc,
@@ -428,8 +429,9 @@ static void *percpu_modalloc(unsigned lo
 {
 	unsigned long extra;
 	unsigned int i;
-	void *ptr;
+	void *ptr = NULL;
 
+	mutex_lock(&module_mutex);
 	if (align > PAGE_SIZE) {
 		printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
 		       name, align, PAGE_SIZE);
@@ -454,24 +456,32 @@ static void *percpu_modalloc(unsigned lo
 		ptr += extra;
 
 		/* Split block if warranted */
-		if (pcpu_size[i] - size > sizeof(unsigned long))
-			if (!split_block(i, size))
-				return NULL;
+		if (pcpu_size[i] - size > sizeof(unsigned long)) {
+			if (!split_block(i, size)) {
+				ptr = NULL;
+				break;
+			}
+		}
 
 		/* Mark allocated */
 		pcpu_size[i] = -pcpu_size[i];
-		return ptr;
+		break;
 	}
+	mutex_unlock(&module_mutex);
 
-	printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
-	       size);
-	return NULL;
+	if (!ptr)
+		printk(KERN_WARNING
+		       "Could not allocate %lu bytes percpu data\n", size);
+	return ptr;
 }
 
 static void percpu_modfree(void *freeme)
 {
 	unsigned int i;
-	void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
+	void *ptr;
+
+	mutex_lock(&module_mutex);
+	ptr = __per_cpu_start + block_size(pcpu_size[0]);
 
 	/* First entry is core kernel percpu data. */
 	for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
@@ -498,6 +508,7 @@ static void percpu_modfree(void *freeme)
 		memmove(&pcpu_size[i+1], &pcpu_size[i+2],
 			(pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
 	}
+	mutex_unlock(&module_mutex);
 }
 
 static int percpu_modinit(void)
@@ -631,7 +642,7 @@ static int already_uses(struct module *a
 	return 0;
 }
 
-/* Module a uses b */
+/* Module a uses b: caller needs module_mutex() */
 int use_module(struct module *a, struct module *b)
 {
 	struct module_use *use;
@@ -672,6 +683,7 @@ static void module_unload_free(struct mo
 {
 	struct module *i;
 
+	mutex_lock(&module_mutex);
 	list_for_each_entry(i, &modules, list) {
 		struct module_use *use;
 
@@ -687,6 +699,7 @@ static void module_unload_free(struct mo
 			}
 		}
 	}
+	mutex_unlock(&module_mutex);
 }
 
 #ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -849,9 +862,13 @@ SYSCALL_DEFINE2(delete_module, const cha
 	/* Store the name of the last unloaded module for diagnostic purposes */
 	strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
 	ddebug_remove_module(mod->name);
+
+	/* free_module doesn't want module_mutex held by caller. */
+	mutex_unlock(&module_mutex);
 	free_module(mod);
+	goto out_stop;
 
- out:
+out:
 	mutex_unlock(&module_mutex);
 out_stop:
 	stop_machine_destroy();
@@ -1052,6 +1069,8 @@ static inline int check_modstruct_versio
 {
 	const unsigned long *crc;
 
+	/* Since this should be found in kernel (which can't be removed),
+	 * no locking is necessary. */
 	if (!find_symbol("module_layout", NULL, &crc, true, false))
 		BUG();
 	return check_version(sechdrs, versindex, "module_layout", mod, crc);
@@ -1091,8 +1110,7 @@ static inline int same_magic(const char 
 }
 #endif /* CONFIG_MODVERSIONS */
 
-/* Resolve a symbol for this module.  I.e. if we find one, record usage.
-   Must be holding module_mutex. */
+/* Resolve a symbol for this module.  I.e. if we find one, record usage. */
 static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
 						  unsigned int versindex,
 						  const char *name,
@@ -1102,6 +1120,7 @@ static const struct kernel_symbol *resol
 	const struct kernel_symbol *sym;
 	const unsigned long *crc;
 
+	mutex_lock(&module_mutex);
 	sym = find_symbol(name, &owner, &crc,
 			  !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
 	/* use_module can fail due to OOM,
@@ -1111,6 +1130,7 @@ static const struct kernel_symbol *resol
 		    !use_module(mod, owner))
 			sym = NULL;
 	}
+	mutex_unlock(&module_mutex);
 	return sym;
 }
 
@@ -1471,11 +1491,14 @@ static int __unlink_module(void *_mod)
 	return 0;
 }
 
-/* Free a module, remove from lists, etc (must hold module_mutex). */
+/* Free a module, remove from lists, etc. */
 static void free_module(struct module *mod)
 {
 	/* Delete from various lists */
+	mutex_lock(&module_mutex);
 	stop_machine(__unlink_module, mod, NULL);
+	mutex_unlock(&module_mutex);
+
 	remove_notes_attrs(mod);
 	remove_sect_attrs(mod);
 	mod_kobject_remove(mod);
@@ -1547,7 +1570,14 @@ static int verify_export_symbols(struct 
 
 	for (i = 0; i < ARRAY_SIZE(arr); i++) {
 		for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
-			if (find_symbol(s->name, &owner, NULL, true, false)) {
+			const struct kernel_symbol *sym;
+
+			/* Stopping preemption makes find_symbol safe. */
+			preempt_disable();
+			sym = find_symbol(s->name, &owner, NULL, true, false);
+			preempt_enable();
+
+			if (sym) {
 				printk(KERN_ERR
 				       "%s: exports duplicate symbol %s"
 				       " (owned by %s)\n",
@@ -1869,11 +1899,13 @@ static void *module_alloc_update_bounds(
 	void *ret = module_alloc(size);
 
 	if (ret) {
+		mutex_lock(&module_mutex);
 		/* Update module bounds. */
 		if ((unsigned long)ret < module_addr_min)
 			module_addr_min = (unsigned long)ret;
 		if ((unsigned long)ret + size > module_addr_max)
 			module_addr_max = (unsigned long)ret + size;
+		mutex_unlock(&module_mutex);
 	}
 	return ret;
 }
@@ -2279,7 +2311,9 @@ static noinline struct module *load_modu
 	 * function to insert in a way safe to concurrent readers.
 	 * The mutex protects against concurrent writers.
 	 */
+	mutex_lock(&module_mutex);
 	list_add_rcu(&mod->list, &modules);
+	mutex_unlock(&module_mutex);
 
 	err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
 	if (err < 0)
@@ -2298,8 +2332,10 @@ static noinline struct module *load_modu
 	return mod;
 
  unlink:
+	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
+	mutex_unlock(&module_mutex);
 	synchronize_sched();
 	module_arch_cleanup(mod);
  cleanup:
@@ -2342,19 +2378,10 @@ SYSCALL_DEFINE3(init_module, void __user
 	if (!capable(CAP_SYS_MODULE))
 		return -EPERM;
 
-	/* Only one module load at a time, please */
-	if (mutex_lock_interruptible(&module_mutex) != 0)
-		return -EINTR;
-
 	/* Do all the hard work */
 	mod = load_module(umod, len, uargs);
-	if (IS_ERR(mod)) {
-		mutex_unlock(&module_mutex);
+	if (IS_ERR(mod))
 		return PTR_ERR(mod);
-	}
-
-	/* Drop lock so they can recurse */
-	mutex_unlock(&module_mutex);
 
 	blocking_notifier_call_chain(&module_notify_list,
 			MODULE_STATE_COMING, mod);
@@ -2370,9 +2397,7 @@ SYSCALL_DEFINE3(init_module, void __user
 		module_put(mod);
 		blocking_notifier_call_chain(&module_notify_list,
 					     MODULE_STATE_GOING, mod);
-		mutex_lock(&module_mutex);
 		free_module(mod);
-		mutex_unlock(&module_mutex);
 		wake_up(&module_wq);
 		return ret;
 	}
@@ -2391,14 +2416,15 @@ SYSCALL_DEFINE3(init_module, void __user
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_LIVE, mod);
 
-	mutex_lock(&module_mutex);
+	/* Be a little careful here: an oops may look at this mem right now. */
+	mod->init_size = 0;
+	mod->init_text_size = 0;
+	wmb();
+	module_free(mod, mod->module_init);
+	mod->module_init = NULL;
+
 	/* Drop initial reference. */
 	module_put(mod);
-	module_free(mod, mod->module_init);
-	mod->module_init = NULL;
-	mod->init_size = 0;
-	mod->init_text_size = 0;
-	mutex_unlock(&module_mutex);
 
 	return 0;
 }
