From af1f94d0afc1d5da04ad6327235f78b3a98f0736 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 28 Nov 2017 08:48:49 -0800 Subject: [PATCH] FROMLIST: kbuild: add support for clang LTO This change adds the configuration option CONFIG_LTO_CLANG, and build system support for clang's Link Time Optimization (LTO). In preparation for LTO support for other compilers, potentially common parts of the changes are gated behind CONFIG_LTO instead. With -flto, instead of object files, clang produces LLVM bitcode, which is compiled into a native object at link time, allowing the final binary to be optimized globally. For more details, see: https://llvm.org/docs/LinkTimeOptimization.html While the kernel normally uses GNU ld for linking, LLVM supports LTO only with lld or GNU gold linkers. This patch set assumes gold will be used with the LLVMgold plug-in to perform the LTO link step. Due to potential incompatibilities with GNU ld, this change also adds LDFINAL_vmlinux for using a different linker for the vmlinux_link step, and defaults to using GNU ld. Assuming LLVMgold.so is in LD_LIBRARY_PATH and CONFIG_LTO_CLANG has been selected, an LTO kernel can be built simply by running make CC=clang. LTO requires clang >= 5.0 and gold from binutils >= 2.27. Bug: 62093296 Bug: 67506682 Change-Id: Ibcd9fc7ec501b4f30b43b4877897615645f8655f (am from https://patchwork.kernel.org/patch/10060329/) Signed-off-by: Sami Tolvanen --- Makefile | 54 ++++++++++++++++++++++- arch/Kconfig | 40 +++++++++++++++++ scripts/Makefile.build | 92 ++++++++++++++++++++++++++++++++++++---- scripts/Makefile.modpost | 40 ++++++++++++++--- scripts/link-vmlinux.sh | 68 ++++++++++++++++++++++++++--- 5 files changed, 270 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index 3c976a69ca2b..67e96818a79a 100644 --- a/Makefile +++ b/Makefile @@ -377,6 +377,7 @@ endif # Make variables (CC, etc...) AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld +LDGOLD = $(CROSS_COMPILE)ld.gold CC = $(CROSS_COMPILE)gcc CPP = $(CC) -E AR = $(CROSS_COMPILE)ar @@ -636,6 +637,20 @@ CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disabl CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) export CFLAGS_GCOV CFLAGS_KCOV +# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure +# ar/cc/ld-* macros return correct values. +ifdef CONFIG_LTO_CLANG +# use GNU gold with LLVMgold for LTO linking, and LD for vmlinux_link +LDFINAL_vmlinux := $(LD) +LD := $(LDGOLD) +LDFLAGS += -plugin LLVMgold.so +# use llvm-ar for building symbol tables from IR files, and llvm-dis instead +# of objdump for processing symbol versions and exports +LLVM_AR := llvm-ar +LLVM_DIS := llvm-dis +export LLVM_AR LLVM_DIS +endif + # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default # values of the respective KBUILD_* variables ARCH_CPPFLAGS := @@ -794,6 +809,26 @@ KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) KBUILD_CFLAGS += $(call cc-option,-fdata-sections,) endif +ifdef CONFIG_LTO_CLANG +lto-clang-flags := -flto -fvisibility=hidden + +# allow disabling only clang LTO where needed +DISABLE_LTO_CLANG := -fno-lto -fvisibility=default +export DISABLE_LTO_CLANG +endif + +ifdef CONFIG_LTO +lto-flags := $(lto-clang-flags) +KBUILD_CFLAGS += $(lto-flags) + +DISABLE_LTO := $(DISABLE_LTO_CLANG) +export DISABLE_LTO + +# LDFINAL_vmlinux and LDFLAGS_FINAL_vmlinux can be set to override +# the linker and flags for vmlinux_link. +export LDFINAL_vmlinux LDFLAGS_FINAL_vmlinux +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) CHECKFLAGS += $(NOSTDINC_FLAGS) @@ -1111,6 +1146,22 @@ prepare-objtool: $(objtool_target) # CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!") PHONY += prepare-compiler-check prepare-compiler-check: FORCE +# Make sure we're using a supported toolchain with LTO_CLANG +ifdef CONFIG_LTO_CLANG + ifneq ($(call clang-ifversion, -ge, 0500, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires clang 5.0 or later >&2 && exit 1 + endif + ifneq ($(call gold-ifversion, -ge, 112000000, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 + endif +endif +# Make sure compiler supports LTO flags +ifdef lto-flags + ifeq ($(call cc-option, $(lto-flags)),) + @echo Cannot use CONFIG_LTO: $(lto-flags) not supported by compiler \ + >&2 && exit 1 + endif +endif # Make sure compiler supports requested stack protector flag. ifdef stackp-name ifeq ($(call cc-option, $(stackp-flag)),) @@ -1584,7 +1635,8 @@ clean: $(clean-dirs) -o -name modules.builtin -o -name '.tmp_*.o.*' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.gcno' \) -type f -print | xargs rm -f + -o -name '*.gcno' \ + -o -name '*.*.symversions' \) -type f -print | xargs rm -f # Generate tags for editors # --------------------------------------------------------------------------- diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..e59db803807f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -605,6 +605,46 @@ config LD_DEAD_CODE_DATA_ELIMINATION sections (e.g., '.text.init'). Typically '.' in section names is used to distinguish them from label names / C identifiers. +config LTO + def_bool n + +config ARCH_SUPPORTS_LTO_CLANG + bool + help + An architecture should select this option it supports: + - compiling with clang, + - compiling inline assembly with clang's integrated assembler, + - and linking with either lld or GNU gold w/ LLVMgold. + +choice + prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)" + default LTO_NONE + help + This option turns on Link-Time Optimization (LTO). + +config LTO_NONE + bool "None" + +config LTO_CLANG + bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)" + depends on ARCH_SUPPORTS_LTO_CLANG + depends on !FTRACE_MCOUNT_RECORD + select LTO + select THIN_ARCHIVES + select LD_DEAD_CODE_DATA_ELIMINATION + help + This option enables clang's Link Time Optimization (LTO), which allows + the compiler to optimize the kernel globally at link time. If you + enable this option, the compiler generates LLVM IR instead of object + files, and the actual compilation from IR occurs at the LTO link step, + which may take several minutes. + + If you select this option, you must compile the kernel with clang >= + 5.0 (make CC=clang) and GNU gold from binutils >= 2.27, and have the + LLVMgold plug-in in LD_LIBRARY_PATH. + +endchoice + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 7143da06d702..bda756f438f1 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -210,6 +210,23 @@ else cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< +ifdef CONFIG_LTO_CLANG +# Generate .o.symversions files for each .o with exported symbols, and link these +# to the kernel and/or modules at the end. +cmd_modversions_c = \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + else \ + if $(LLVM_DIS) -o=- $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + fi; \ + mv -f $(@D)/.tmp_$(@F) $@; +else cmd_modversions_c = \ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ @@ -222,6 +239,7 @@ cmd_modversions_c = \ mv -f $(@D)/.tmp_$(@F) $@; \ fi; endif +endif ifdef CONFIG_FTRACE_MCOUNT_RECORD ifdef BUILD_C_RECORDMCOUNT @@ -462,8 +480,29 @@ $(sort $(subdir-obj-y)): $(subdir-ym) ; # ifdef builtin-target +ifdef CONFIG_LTO_CLANG + ifdef CONFIG_MODVERSIONS + # combine symversions for later processing + update_lto_symversions = \ + rm -f $@.symversions; \ + for i in $(filter-out FORCE,$^); do \ + if [ -f $$i.symversions ]; then \ + cat $$i.symversions \ + >> $@.symversions; \ + fi; \ + done; + endif + # rebuild the symbol table with llvm-ar to include IR files + update_lto_symtable = ; \ + mv -f $@ $@.tmp; \ + $(LLVM_AR) rcsT$(KBUILD_ARFLAGS) $@ \ + $$($(AR) t $@.tmp); \ + rm -f $@.tmp +endif + ifdef CONFIG_THIN_ARCHIVES - cmd_make_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) + cmd_make_builtin = $(update_lto_symversions) \ + rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) cmd_make_empty_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) quiet_cmd_link_o_target = AR $@ else @@ -504,7 +543,11 @@ ifdef lib-target quiet_cmd_link_l_target = AR $@ ifdef CONFIG_THIN_ARCHIVES - cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) + cmd_link_l_target = \ + $(update_lto_symversions) \ + rm -f $@; \ + $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) \ + $(update_lto_symtable) else cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y) endif @@ -522,14 +565,36 @@ else ref_prefix = EXTERN( endif -quiet_cmd_export_list = EXPORTS $@ -cmd_export_list = $(OBJDUMP) -h $< | \ - sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/$(ref_prefix)\1)/p' >$(ksyms-lds);\ - rm -f $(dummy-object);\ +filter_export_list = sed -ne '/___ksymtab/s/.*+\([^ "]*\).*/$(ref_prefix)\1)/p' +link_export_list = rm -f $(dummy-object);\ echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\ $(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\ rm $(dummy-object) $(ksyms-lds) +quiet_cmd_export_list = EXPORTS $@ + +ifdef CONFIG_LTO_CLANG +# objdump doesn't understand IR files and llvm-dis doesn't support archives, +# so we'll walk through each file in the archive separately +cmd_export_list = \ + rm -f $(ksyms-lds); \ + for o in $$($(AR) t $<); do \ + if $(OBJDUMP) -h $$o >/dev/null 2>/dev/null; then \ + $(OBJDUMP) -h $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + else \ + $(LLVM_DIS) -o=- $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + fi; \ + done; \ + $(link_export_list) +else +cmd_export_list = $(OBJDUMP) -h $< | $(filter_export_list) >$(ksyms-lds); \ + $(link_export_list) +endif + $(obj)/lib-ksyms.o: $(lib-target) FORCE $(call if_changed,export_list) @@ -557,23 +622,32 @@ cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secana ifdef CONFIG_THIN_ARCHIVES quiet_cmd_link_multi-y = AR $@ - cmd_link_multi-y = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) + cmd_link_multi-y = $(update_lto_symversions) \ + rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) \ + $(update_lto_symtable) else quiet_cmd_link_multi-y = LD $@ cmd_link_multi-y = $(cmd_link_multi-link) endif quiet_cmd_link_multi-m = LD [M] $@ -cmd_link_multi-m = $(cmd_link_multi-link) + +ifdef CONFIG_LTO_CLANG + # don't compile IR until needed + cmd_link_multi-m = $(cmd_link_multi-y) +else + cmd_link_multi-m = $(cmd_link_multi-link) +endif $(multi-used-y): FORCE $(call if_changed,link_multi-y) -$(call multi_depend, $(multi-used-y), .o, -objs -y) $(multi-used-m): FORCE $(call if_changed,link_multi-m) @{ echo $(@:.o=.ko); echo $(link_multi_deps); \ $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod) + +$(call multi_depend, $(multi-used-y), .o, -objs -y) $(call multi_depend, $(multi-used-m), .o, -objs -y -m) targets += $(multi-used-y) $(multi-used-m) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 991db7d6e4df..c86c99b4a1cf 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -83,12 +83,28 @@ modpost = scripts/mod/modpost \ MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) +# If CONFIG_LTO_CLANG is enabled, .o files are either LLVM IR, or empty, so we +# need to link them into actual objects before passing them to modpost +modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,) + +ifdef CONFIG_LTO_CLANG +quiet_cmd_cc_lto_link_modules = LD [M] $@ +cmd_cc_lto_link_modules = \ + $(LD) $(ld_flags) -r -o $(@) \ + $(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \ + echo -T $(@:$(modpost-ext).o=.o.symversions)) \ + --whole-archive $(filter-out FORCE,$^) + +$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE + $(call if_changed,cc_lto_link_modules) +endif + # We can go over command line length here, so be careful. quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules - cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T - + cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/$(modpost-ext)\.o/' | $(modpost) $(MODPOST_OPT) -s -T - PHONY += __modpost -__modpost: $(modules:.ko=.o) FORCE +__modpost: $(modules:.ko=$(modpost-ext).o) FORCE $(call cmd,modpost) $(wildcard vmlinux) quiet_cmd_kernel-mod = MODPOST $@ @@ -98,8 +114,7 @@ vmlinux.o: FORCE $(call cmd,kernel-mod) # Declare generated files as targets for modpost -$(modules:.ko=.mod.c): __modpost ; - +$(modules:.ko=$(modpost-ext).mod.c): __modpost ; # Step 5), compile all *.mod.c files @@ -110,22 +125,33 @@ quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) $(KBUILD_CFLAGS_MODULE) $(CFLAGS_MODULE) \ -c -o $@ $< -$(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE +$(modules:.ko=.mod.o): %.mod.o: %$(modpost-ext).mod.c FORCE $(call if_changed_dep,cc_o_c) -targets += $(modules:.ko=.mod.o) +targets += $(modules:.ko=$(modpost-ext).mod.o) ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Step 6), final link of the modules with optional arch pass after final link quiet_cmd_ld_ko_o = LD [M] $@ + +ifdef CONFIG_LTO_CLANG + cmd_ld_ko_o = \ + $(LD) -r $(LDFLAGS) \ + $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ + $(shell [ -s $(@:.ko=.o.symversions) ] && \ + echo -T $(@:.ko=.o.symversions)) \ + -o $@ --whole-archive \ + $(filter-out FORCE,$(^:$(modpost-ext).o=.o)) +else cmd_ld_ko_o = \ $(LD) -r $(LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ -o $@ $(filter-out FORCE,$^) ; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) +endif -$(modules): %.ko :%.o %.mod.o FORCE +$(modules): %.ko: %$(modpost-ext).o %.mod.o FORCE +$(call if_changed,ld_ko_o) targets += $(modules) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index e6818b8e7141..ece268ceaaa1 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -61,7 +61,38 @@ archive_builtin() ${AR} rcsTP${KBUILD_ARFLAGS} built-in.o \ ${KBUILD_VMLINUX_INIT} \ ${KBUILD_VMLINUX_MAIN} + + if [ -n "${CONFIG_LTO_CLANG}" ]; then + mv -f built-in.o built-in.o.tmp + ${LLVM_AR} rcsT${KBUILD_ARFLAGS} built-in.o $(${AR} t built-in.o.tmp) + rm -f built-in.o.tmp + fi + fi +} + +# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into +# .tmp_symversions +modversions() +{ + if [ -z "${CONFIG_LTO_CLANG}" ]; then + return fi + + if [ -z "${CONFIG_MODVERSIONS}" ]; then + return + fi + + rm -f .tmp_symversions + + for a in built-in.o ${KBUILD_VMLINUX_LIBS}; do + for o in $(${AR} t $a); do + if [ -f ${o}.symversions ]; then + cat ${o}.symversions >> .tmp_symversions + fi + done + done + + echo "-T .tmp_symversions" } # Link of vmlinux.o used for section mismatch analysis @@ -84,7 +115,16 @@ modpost_link() ${KBUILD_VMLINUX_LIBS} \ --end-group" fi - ${LD} ${LDFLAGS} -r -o ${1} ${objects} + + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # This might take a while, so indicate that we're doing + # an LTO link + info LTO vmlinux.o + else + info LD vmlinux.o + fi + + ${LD} ${LDFLAGS} -r -o ${1} $(modversions) ${objects} } # Link of vmlinux @@ -96,8 +136,16 @@ vmlinux_link() local objects if [ "${SRCARCH}" != "um" ]; then - if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then - objects="--whole-archive \ + local ld=${LD} + local ldflags="${LDFLAGS} ${LDFLAGS_vmlinux}" + + if [ -n "${LDFINAL_vmlinux}" ]; then + ld=${LDFINAL_vmlinux} + ldflags="${LDFLAGS_FINAL_vmlinux} ${LDFLAGS_vmlinux}" + fi + + if [[ -n "${CONFIG_THIN_ARCHIVES}" && -z "${CONFIG_LTO_CLANG}" ]]; then + objects="--whole-archive \ built-in.o \ --no-whole-archive \ --start-group \ @@ -113,8 +161,7 @@ vmlinux_link() ${1}" fi - ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ - -T ${lds} ${objects} + ${ld} ${ldflags} -o ${2} -T ${lds} ${objects} else if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then objects="-Wl,--whole-archive \ @@ -141,7 +188,6 @@ vmlinux_link() fi } - # Create ${2} .o file with all symbols from the ${1} object file kallsyms() { @@ -192,6 +238,7 @@ cleanup() rm -f .tmp_System.map rm -f .tmp_kallsyms* rm -f .tmp_version + rm -f .tmp_symversions rm -f .tmp_vmlinux* rm -f built-in.o rm -f System.map @@ -253,12 +300,19 @@ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init GCC_PLUGINS_CFLAGS="${GC archive_builtin #link vmlinux.o -info LD vmlinux.o modpost_link vmlinux.o # modpost vmlinux.o to check for section mismatches ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o +if [ -n "${CONFIG_LTO_CLANG}" ]; then + # Re-use vmlinux.o, so we can avoid the slow LTO link step in + # vmlinux_link + KBUILD_VMLINUX_INIT= + KBUILD_VMLINUX_MAIN=vmlinux.o + KBUILD_VMLINUX_LIBS= +fi + kallsymso="" kallsyms_vmlinux="" if [ -n "${CONFIG_KALLSYMS}" ]; then