From fe1372ae09569efed5a62b50a06d5375f79d607b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 9 Sep 2019 19:28:51 +0530 Subject: [PATCH] ARM: option for loading modules into vmalloc area Usually modules are loaded into small area prior to the kernel text because they are linked with the kernel using short calls. Compile-time instrumentation like GCOV or KASAN bloats code a lot, and as a result huge modules no longer fit into reserved area. This patch adds option CONFIG_MODULES_USE_VMALLOC which lifts limitation on amount of loaded modules. It links modules using long-calls (option -mlong-calls) and loads them into vmalloc area. In few places exported symbols are called from inline assembly. This patch adds macro for such call sites: __asmbl and __asmbl_clobber. Call turns into single 'bl' or sequence 'movw; movt; blx' depending on context and state of config option. Unfortunately this option isn't compatible with CONFIG_FUNCTION_TRACER. Compiler emits short calls to profiling function despite of -mlong-calls. This is a bug in GCC, but ftrace anyway needs an update to handle this. Signed-off-by: Konstantin Khlebnikov Patch-mainline: linux-arm-kernel @ 18/11/2014, 20:21:46 Change-Id: Iea7990a033c060c26f5782125fb63d6f96a9d218 Signed-off-by: Shubham Aggarwal --- arch/arm/Kconfig | 20 ++++++++++++++++++++ arch/arm/Makefile | 4 ++++ arch/arm/include/asm/compiler.h | 13 +++++++++++++ arch/arm/include/asm/div64.h | 2 +- arch/arm/include/asm/memory.h | 11 +++++++++++ arch/arm/include/asm/uaccess.h | 16 ++++++++-------- arch/arm/kernel/module.c | 2 ++ arch/arm/mm/dump.c | 10 +++++++++- arch/arm/mm/init.c | 2 ++ arch/arm/mm/mmu.c | 7 +++---- arch/arm/mm/pgd.c | 5 +++-- 11 files changed, 76 insertions(+), 16 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 82fb2e63df95..4829af53dd7e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1726,6 +1726,26 @@ config CPU_SW_DOMAIN_PAN Their lower 1MB needs to remain accessible for the vectors, but the remainder of userspace will become appropriately inaccessible. +config MODULES_USE_LONG_CALLS + bool + help + Use long calls for calling exported symbols. + +config MODULES_USE_VMALLOC + bool "Put modules into vmalloc area" + select MODULES_USE_LONG_CALLS + depends on MMU && MODULES + depends on !XIP_KERNEL + depends on !FUNCTION_TRACER + help + Usually modules are loaded into small area prior to the kernel text + because they are linked with the kernel using short calls. + + This option enables long calls and moves modules into vmalloc area. + This allows to load more modules but adds some perfromance penalty. + + If unsure, say n. + config HW_PERF_EVENTS def_bool y depends on ARM_PMU diff --git a/arch/arm/Makefile b/arch/arm/Makefile index e9b84ea6a0d6..c016458b872d 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -136,6 +136,10 @@ CFLAGS_ISA :=$(call cc-option,-marm,) AFLAGS_ISA :=$(CFLAGS_ISA) endif +ifeq ($(CONFIG_MODULES_USE_LONG_CALLS),y) +CFLAGS_MODULE += -mlong-calls +endif + # Need -Uarm for gcc < 3.x KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float $(call cc-option, -Uarm,) KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float diff --git a/arch/arm/include/asm/compiler.h b/arch/arm/include/asm/compiler.h index 5e94e67d1083..3cb1f163e634 100644 --- a/arch/arm/include/asm/compiler.h +++ b/arch/arm/include/asm/compiler.h @@ -25,5 +25,18 @@ ".endif; " \ ".endif\n\t" +/* + * This is used for calling exported symbols from inline assembly code. + */ +#if defined(MODULE) && defined(CONFIG_MODULES_USE_LONG_CALLS) +#define __asmbl(cond, reg, target) \ + "movw " reg ", #:lower16:" target "\n\t" \ + "movt " reg ", #:upper16:" target "\n\t" \ + "blx" cond " " reg "\n\t" +#define __asmbl_clobber(reg) ,reg +#else +#define __asmbl(cond, reg, target) "bl" cond " " target"\n\t" +#define __asmbl_clobber(reg) +#endif #endif /* __ASM_ARM_COMPILER_H */ diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index 898e9c78a7e7..b2ae4a42900e 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -40,7 +40,7 @@ static inline uint32_t __div64_32(uint64_t *n, uint32_t base) __asmeq("%1", "r2") __asmeq("%2", "r0") __asmeq("%3", "r4") - "bl __do_div64" + __asmbl("", "ip", "__do_div64") : "=r" (__rem), "=r" (__res) : "r" (__n), "r" (__base) : "ip", "lr", "cc"); diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 1f54e4e98c1e..374b6b3befca 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -45,6 +45,15 @@ */ #define TASK_SIZE_26 (UL(1) << 26) +#ifdef CONFIG_MODULES_USE_VMALLOC +/* + * Modules might be anywhere in the vmalloc area. + */ +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END + +#else /* CONFIG_MODULES_USE_VMALLOC */ + /* * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 32MB of the kernel text. @@ -69,6 +78,8 @@ #define MODULES_END (PAGE_OFFSET) #endif +#endif /* CONFIG_MODULES_USE_VMALLOC */ + /* * The XIP kernel gets mapped at the bottom of the module vm area. * Since we use sections to map it, this macro replaces the physical address diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index a5807b67ca8a..5fadb7885b8c 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -145,21 +145,21 @@ extern int __get_user_64t_1(void *); extern int __get_user_64t_2(void *); extern int __get_user_64t_4(void *); -#define __GUP_CLOBBER_1 "lr", "cc" +#define __GUP_CLOBBER_1 "lr", "cc" __asmbl_clobber("ip") #ifdef CONFIG_CPU_USE_DOMAINS #define __GUP_CLOBBER_2 "ip", "lr", "cc" #else -#define __GUP_CLOBBER_2 "lr", "cc" +#define __GUP_CLOBBER_2 "lr", "cc" __asmbl_clobber("ip") #endif -#define __GUP_CLOBBER_4 "lr", "cc" -#define __GUP_CLOBBER_32t_8 "lr", "cc" -#define __GUP_CLOBBER_8 "lr", "cc" +#define __GUP_CLOBBER_4 "lr", "cc" __asmbl_clobber("ip") +#define __GUP_CLOBBER_32t_8 "lr", "cc" __asmbl_clobber("ip") +#define __GUP_CLOBBER_8 "lr", "cc" __asmbl_clobber("ip") #define __get_user_x(__r2, __p, __e, __l, __s) \ __asm__ __volatile__ ( \ __asmeq("%0", "r0") __asmeq("%1", "r2") \ __asmeq("%3", "r1") \ - "bl __get_user_" #__s \ + __asmbl("", "ip", "__get_user_" #__s) \ : "=&r" (__e), "=r" (__r2) \ : "0" (__p), "r" (__l) \ : __GUP_CLOBBER_##__s) @@ -181,7 +181,7 @@ extern int __get_user_64t_4(void *); __asm__ __volatile__ ( \ __asmeq("%0", "r0") __asmeq("%1", "r2") \ __asmeq("%3", "r1") \ - "bl __get_user_64t_" #__s \ + __asmbl("", "ip", "__get_user_64t_" #__s) \ : "=&r" (__e), "=r" (__r2) \ : "0" (__p), "r" (__l) \ : __GUP_CLOBBER_##__s) @@ -251,7 +251,7 @@ extern int __put_user_8(void *, unsigned long long); __asm__ __volatile__ ( \ __asmeq("%0", "r0") __asmeq("%2", "r2") \ __asmeq("%3", "r1") \ - "bl __put_user_" #__s \ + __asmbl("", "ip", "__put_user_" #__s) \ : "=&r" (__e) \ : "0" (__p), "r" (__r2), "r" (__l) \ : "ip", "lr", "cc"); \ diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 3ff571c2c71c..19b2f1fa32ff 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -38,6 +38,7 @@ #endif #ifdef CONFIG_MMU +#ifndef CONFIG_MODULES_USE_VMALLOC void *module_alloc(unsigned long size) { gfp_t gfp_mask = GFP_KERNEL; @@ -56,6 +57,7 @@ void *module_alloc(unsigned long size) GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } +#endif /* CONFIG_MODULES_USE_VMALLOC */ #endif int diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index fc3b44028cfb..f980f35598eb 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -21,6 +21,7 @@ #include #include #include +#include struct addr_marker { unsigned long start_address; @@ -28,7 +29,12 @@ struct addr_marker { }; static struct addr_marker address_markers[] = { +#ifndef CONFIG_MODULES_USE_VMALLOC { MODULES_VADDR, "Modules" }, +#endif +#ifdef CONFIG_HIGHMEM + { PKMAP_BASE, "Page kmap" }, +#endif { PAGE_OFFSET, "Kernel Mapping" }, { 0, "vmalloc() Area" }, { VMALLOC_END, "vmalloc() End" }, @@ -386,7 +392,9 @@ static int ptdump_init(void) for (j = 0; j < pg_level[i].num; j++) pg_level[i].mask |= pg_level[i].bits[j].mask; - address_markers[2].start_address = VMALLOC_START; + i = 1 + !IS_ENABLED(CONFIG_MODULES_USE_VMALLOC) + + !!IS_ENABLED(CONFIG_HIGHMEM); + address_markers[i].start_address = VMALLOC_START; pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 55c69be4638b..f0eb37b5ff09 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -623,7 +623,9 @@ void __init mem_init(void) * be detected at build time already. */ #ifdef CONFIG_MMU +#ifndef CONFIG_MODULES_USE_VMALLOC BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); +#endif BUG_ON(TASK_SIZE > MODULES_VADDR); #endif diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 26a27fa9d0bc..563c46d20b2f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1261,16 +1261,15 @@ void __init adjust_lowmem_bounds(void) static inline void prepare_page_table(void) { - unsigned long addr; + unsigned long addr = 0; phys_addr_t end; /* * Clear out all the mappings below the kernel image. */ - for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) - pmd_clear(pmd_off_k(addr)); - #ifdef CONFIG_XIP_KERNEL + for ( ; addr < MODULES_VADDR; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); /* The XIP kernel is mapped in the module area -- skip over it */ addr = ((unsigned long)_exiprom + PMD_SIZE - 1) & PMD_MASK; #endif diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index c1c1a5c67da1..6a1e9b44be99 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -53,17 +53,18 @@ pgd_t *pgd_alloc(struct mm_struct *mm) clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); #ifdef CONFIG_ARM_LPAE +#if defined(CONFIG_HIGHMEM) || !defined(CONFIG_MODULES_USE_VMALLOC) /* * Allocate PMD table for modules and pkmap mappings. */ - new_pud = pud_alloc(mm, new_pgd + pgd_index(MODULES_VADDR), - MODULES_VADDR); + new_pud = pud_alloc(mm, new_pgd + pgd_index(PKMAP_BASE), PKMAP_BASE); if (!new_pud) goto no_pud; new_pmd = pmd_alloc(mm, new_pud, 0); if (!new_pmd) goto no_pmd; +#endif #endif if (!vectors_high()) {