diff --git a/Makefile b/Makefile index 3c976a69ca2b..67e96818a79a 100644 --- a/Makefile +++ b/Makefile @@ -377,6 +377,7 @@ endif # Make variables (CC, etc...) AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld +LDGOLD = $(CROSS_COMPILE)ld.gold CC = $(CROSS_COMPILE)gcc CPP = $(CC) -E AR = $(CROSS_COMPILE)ar @@ -636,6 +637,20 @@ CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disabl CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) export CFLAGS_GCOV CFLAGS_KCOV +# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure +# ar/cc/ld-* macros return correct values. +ifdef CONFIG_LTO_CLANG +# use GNU gold with LLVMgold for LTO linking, and LD for vmlinux_link +LDFINAL_vmlinux := $(LD) +LD := $(LDGOLD) +LDFLAGS += -plugin LLVMgold.so +# use llvm-ar for building symbol tables from IR files, and llvm-dis instead +# of objdump for processing symbol versions and exports +LLVM_AR := llvm-ar +LLVM_DIS := llvm-dis +export LLVM_AR LLVM_DIS +endif + # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default # values of the respective KBUILD_* variables ARCH_CPPFLAGS := @@ -794,6 +809,26 @@ KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) KBUILD_CFLAGS += $(call cc-option,-fdata-sections,) endif +ifdef CONFIG_LTO_CLANG +lto-clang-flags := -flto -fvisibility=hidden + +# allow disabling only clang LTO where needed +DISABLE_LTO_CLANG := -fno-lto -fvisibility=default +export DISABLE_LTO_CLANG +endif + +ifdef CONFIG_LTO +lto-flags := $(lto-clang-flags) +KBUILD_CFLAGS += $(lto-flags) + +DISABLE_LTO := $(DISABLE_LTO_CLANG) +export DISABLE_LTO + +# LDFINAL_vmlinux and LDFLAGS_FINAL_vmlinux can be set to override +# the linker and flags for vmlinux_link. +export LDFINAL_vmlinux LDFLAGS_FINAL_vmlinux +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) CHECKFLAGS += $(NOSTDINC_FLAGS) @@ -1111,6 +1146,22 @@ prepare-objtool: $(objtool_target) # CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!") PHONY += prepare-compiler-check prepare-compiler-check: FORCE +# Make sure we're using a supported toolchain with LTO_CLANG +ifdef CONFIG_LTO_CLANG + ifneq ($(call clang-ifversion, -ge, 0500, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires clang 5.0 or later >&2 && exit 1 + endif + ifneq ($(call gold-ifversion, -ge, 112000000, y), y) + @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 + endif +endif +# Make sure compiler supports LTO flags +ifdef lto-flags + ifeq ($(call cc-option, $(lto-flags)),) + @echo Cannot use CONFIG_LTO: $(lto-flags) not supported by compiler \ + >&2 && exit 1 + endif +endif # Make sure compiler supports requested stack protector flag. ifdef stackp-name ifeq ($(call cc-option, $(stackp-flag)),) @@ -1584,7 +1635,8 @@ clean: $(clean-dirs) -o -name modules.builtin -o -name '.tmp_*.o.*' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.gcno' \) -type f -print | xargs rm -f + -o -name '*.gcno' \ + -o -name '*.*.symversions' \) -type f -print | xargs rm -f # Generate tags for editors # --------------------------------------------------------------------------- diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..e59db803807f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -605,6 +605,46 @@ config LD_DEAD_CODE_DATA_ELIMINATION sections (e.g., '.text.init'). Typically '.' in section names is used to distinguish them from label names / C identifiers. +config LTO + def_bool n + +config ARCH_SUPPORTS_LTO_CLANG + bool + help + An architecture should select this option it supports: + - compiling with clang, + - compiling inline assembly with clang's integrated assembler, + - and linking with either lld or GNU gold w/ LLVMgold. + +choice + prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)" + default LTO_NONE + help + This option turns on Link-Time Optimization (LTO). + +config LTO_NONE + bool "None" + +config LTO_CLANG + bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)" + depends on ARCH_SUPPORTS_LTO_CLANG + depends on !FTRACE_MCOUNT_RECORD + select LTO + select THIN_ARCHIVES + select LD_DEAD_CODE_DATA_ELIMINATION + help + This option enables clang's Link Time Optimization (LTO), which allows + the compiler to optimize the kernel globally at link time. If you + enable this option, the compiler generates LLVM IR instead of object + files, and the actual compilation from IR occurs at the LTO link step, + which may take several minutes. + + If you select this option, you must compile the kernel with clang >= + 5.0 (make CC=clang) and GNU gold from binutils >= 2.27, and have the + LLVMgold plug-in in LD_LIBRARY_PATH. + +endchoice + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 7143da06d702..bda756f438f1 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -210,6 +210,23 @@ else cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< +ifdef CONFIG_LTO_CLANG +# Generate .o.symversions files for each .o with exported symbols, and link these +# to the kernel and/or modules at the end. +cmd_modversions_c = \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + else \ + if $(LLVM_DIS) -o=- $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + fi; \ + mv -f $(@D)/.tmp_$(@F) $@; +else cmd_modversions_c = \ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ @@ -222,6 +239,7 @@ cmd_modversions_c = \ mv -f $(@D)/.tmp_$(@F) $@; \ fi; endif +endif ifdef CONFIG_FTRACE_MCOUNT_RECORD ifdef BUILD_C_RECORDMCOUNT @@ -462,8 +480,29 @@ $(sort $(subdir-obj-y)): $(subdir-ym) ; # ifdef builtin-target +ifdef CONFIG_LTO_CLANG + ifdef CONFIG_MODVERSIONS + # combine symversions for later processing + update_lto_symversions = \ + rm -f $@.symversions; \ + for i in $(filter-out FORCE,$^); do \ + if [ -f $$i.symversions ]; then \ + cat $$i.symversions \ + >> $@.symversions; \ + fi; \ + done; + endif + # rebuild the symbol table with llvm-ar to include IR files + update_lto_symtable = ; \ + mv -f $@ $@.tmp; \ + $(LLVM_AR) rcsT$(KBUILD_ARFLAGS) $@ \ + $$($(AR) t $@.tmp); \ + rm -f $@.tmp +endif + ifdef CONFIG_THIN_ARCHIVES - cmd_make_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) + cmd_make_builtin = $(update_lto_symversions) \ + rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) cmd_make_empty_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) quiet_cmd_link_o_target = AR $@ else @@ -504,7 +543,11 @@ ifdef lib-target quiet_cmd_link_l_target = AR $@ ifdef CONFIG_THIN_ARCHIVES - cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) + cmd_link_l_target = \ + $(update_lto_symversions) \ + rm -f $@; \ + $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) \ + $(update_lto_symtable) else cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y) endif @@ -522,14 +565,36 @@ else ref_prefix = EXTERN( endif -quiet_cmd_export_list = EXPORTS $@ -cmd_export_list = $(OBJDUMP) -h $< | \ - sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/$(ref_prefix)\1)/p' >$(ksyms-lds);\ - rm -f $(dummy-object);\ +filter_export_list = sed -ne '/___ksymtab/s/.*+\([^ "]*\).*/$(ref_prefix)\1)/p' +link_export_list = rm -f $(dummy-object);\ echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\ $(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\ rm $(dummy-object) $(ksyms-lds) +quiet_cmd_export_list = EXPORTS $@ + +ifdef CONFIG_LTO_CLANG +# objdump doesn't understand IR files and llvm-dis doesn't support archives, +# so we'll walk through each file in the archive separately +cmd_export_list = \ + rm -f $(ksyms-lds); \ + for o in $$($(AR) t $<); do \ + if $(OBJDUMP) -h $$o >/dev/null 2>/dev/null; then \ + $(OBJDUMP) -h $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + else \ + $(LLVM_DIS) -o=- $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + fi; \ + done; \ + $(link_export_list) +else +cmd_export_list = $(OBJDUMP) -h $< | $(filter_export_list) >$(ksyms-lds); \ + $(link_export_list) +endif + $(obj)/lib-ksyms.o: $(lib-target) FORCE $(call if_changed,export_list) @@ -557,23 +622,32 @@ cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secana ifdef CONFIG_THIN_ARCHIVES quiet_cmd_link_multi-y = AR $@ - cmd_link_multi-y = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) + cmd_link_multi-y = $(update_lto_symversions) \ + rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) \ + $(update_lto_symtable) else quiet_cmd_link_multi-y = LD $@ cmd_link_multi-y = $(cmd_link_multi-link) endif quiet_cmd_link_multi-m = LD [M] $@ -cmd_link_multi-m = $(cmd_link_multi-link) + +ifdef CONFIG_LTO_CLANG + # don't compile IR until needed + cmd_link_multi-m = $(cmd_link_multi-y) +else + cmd_link_multi-m = $(cmd_link_multi-link) +endif $(multi-used-y): FORCE $(call if_changed,link_multi-y) -$(call multi_depend, $(multi-used-y), .o, -objs -y) $(multi-used-m): FORCE $(call if_changed,link_multi-m) @{ echo $(@:.o=.ko); echo $(link_multi_deps); \ $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod) + +$(call multi_depend, $(multi-used-y), .o, -objs -y) $(call multi_depend, $(multi-used-m), .o, -objs -y -m) targets += $(multi-used-y) $(multi-used-m) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 991db7d6e4df..c86c99b4a1cf 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -83,12 +83,28 @@ modpost = scripts/mod/modpost \ MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) +# If CONFIG_LTO_CLANG is enabled, .o files are either LLVM IR, or empty, so we +# need to link them into actual objects before passing them to modpost +modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,) + +ifdef CONFIG_LTO_CLANG +quiet_cmd_cc_lto_link_modules = LD [M] $@ +cmd_cc_lto_link_modules = \ + $(LD) $(ld_flags) -r -o $(@) \ + $(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \ + echo -T $(@:$(modpost-ext).o=.o.symversions)) \ + --whole-archive $(filter-out FORCE,$^) + +$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE + $(call if_changed,cc_lto_link_modules) +endif + # We can go over command line length here, so be careful. quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules - cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T - + cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/$(modpost-ext)\.o/' | $(modpost) $(MODPOST_OPT) -s -T - PHONY += __modpost -__modpost: $(modules:.ko=.o) FORCE +__modpost: $(modules:.ko=$(modpost-ext).o) FORCE $(call cmd,modpost) $(wildcard vmlinux) quiet_cmd_kernel-mod = MODPOST $@ @@ -98,8 +114,7 @@ vmlinux.o: FORCE $(call cmd,kernel-mod) # Declare generated files as targets for modpost -$(modules:.ko=.mod.c): __modpost ; - +$(modules:.ko=$(modpost-ext).mod.c): __modpost ; # Step 5), compile all *.mod.c files @@ -110,22 +125,33 @@ quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) $(KBUILD_CFLAGS_MODULE) $(CFLAGS_MODULE) \ -c -o $@ $< -$(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE +$(modules:.ko=.mod.o): %.mod.o: %$(modpost-ext).mod.c FORCE $(call if_changed_dep,cc_o_c) -targets += $(modules:.ko=.mod.o) +targets += $(modules:.ko=$(modpost-ext).mod.o) ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Step 6), final link of the modules with optional arch pass after final link quiet_cmd_ld_ko_o = LD [M] $@ + +ifdef CONFIG_LTO_CLANG + cmd_ld_ko_o = \ + $(LD) -r $(LDFLAGS) \ + $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ + $(shell [ -s $(@:.ko=.o.symversions) ] && \ + echo -T $(@:.ko=.o.symversions)) \ + -o $@ --whole-archive \ + $(filter-out FORCE,$(^:$(modpost-ext).o=.o)) +else cmd_ld_ko_o = \ $(LD) -r $(LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ -o $@ $(filter-out FORCE,$^) ; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) +endif -$(modules): %.ko :%.o %.mod.o FORCE +$(modules): %.ko: %$(modpost-ext).o %.mod.o FORCE +$(call if_changed,ld_ko_o) targets += $(modules) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index e6818b8e7141..ece268ceaaa1 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -61,7 +61,38 @@ archive_builtin() ${AR} rcsTP${KBUILD_ARFLAGS} built-in.o \ ${KBUILD_VMLINUX_INIT} \ ${KBUILD_VMLINUX_MAIN} + + if [ -n "${CONFIG_LTO_CLANG}" ]; then + mv -f built-in.o built-in.o.tmp + ${LLVM_AR} rcsT${KBUILD_ARFLAGS} built-in.o $(${AR} t built-in.o.tmp) + rm -f built-in.o.tmp + fi + fi +} + +# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into +# .tmp_symversions +modversions() +{ + if [ -z "${CONFIG_LTO_CLANG}" ]; then + return fi + + if [ -z "${CONFIG_MODVERSIONS}" ]; then + return + fi + + rm -f .tmp_symversions + + for a in built-in.o ${KBUILD_VMLINUX_LIBS}; do + for o in $(${AR} t $a); do + if [ -f ${o}.symversions ]; then + cat ${o}.symversions >> .tmp_symversions + fi + done + done + + echo "-T .tmp_symversions" } # Link of vmlinux.o used for section mismatch analysis @@ -84,7 +115,16 @@ modpost_link() ${KBUILD_VMLINUX_LIBS} \ --end-group" fi - ${LD} ${LDFLAGS} -r -o ${1} ${objects} + + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # This might take a while, so indicate that we're doing + # an LTO link + info LTO vmlinux.o + else + info LD vmlinux.o + fi + + ${LD} ${LDFLAGS} -r -o ${1} $(modversions) ${objects} } # Link of vmlinux @@ -96,8 +136,16 @@ vmlinux_link() local objects if [ "${SRCARCH}" != "um" ]; then - if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then - objects="--whole-archive \ + local ld=${LD} + local ldflags="${LDFLAGS} ${LDFLAGS_vmlinux}" + + if [ -n "${LDFINAL_vmlinux}" ]; then + ld=${LDFINAL_vmlinux} + ldflags="${LDFLAGS_FINAL_vmlinux} ${LDFLAGS_vmlinux}" + fi + + if [[ -n "${CONFIG_THIN_ARCHIVES}" && -z "${CONFIG_LTO_CLANG}" ]]; then + objects="--whole-archive \ built-in.o \ --no-whole-archive \ --start-group \ @@ -113,8 +161,7 @@ vmlinux_link() ${1}" fi - ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ - -T ${lds} ${objects} + ${ld} ${ldflags} -o ${2} -T ${lds} ${objects} else if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then objects="-Wl,--whole-archive \ @@ -141,7 +188,6 @@ vmlinux_link() fi } - # Create ${2} .o file with all symbols from the ${1} object file kallsyms() { @@ -192,6 +238,7 @@ cleanup() rm -f .tmp_System.map rm -f .tmp_kallsyms* rm -f .tmp_version + rm -f .tmp_symversions rm -f .tmp_vmlinux* rm -f built-in.o rm -f System.map @@ -253,12 +300,19 @@ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init GCC_PLUGINS_CFLAGS="${GC archive_builtin #link vmlinux.o -info LD vmlinux.o modpost_link vmlinux.o # modpost vmlinux.o to check for section mismatches ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o +if [ -n "${CONFIG_LTO_CLANG}" ]; then + # Re-use vmlinux.o, so we can avoid the slow LTO link step in + # vmlinux_link + KBUILD_VMLINUX_INIT= + KBUILD_VMLINUX_MAIN=vmlinux.o + KBUILD_VMLINUX_LIBS= +fi + kallsymso="" kallsyms_vmlinux="" if [ -n "${CONFIG_KALLSYMS}" ]; then