diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9e9cf03b471f..66e721a966ce 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1568,6 +1568,15 @@ initrd= [BOOT] Specify the location of the initial ramdisk + init_on_alloc= [MM] Fill newly allocated pages and heap objects with + zeroes. + Format: 0 | 1 + Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON. + + init_on_free= [MM] Fill freed pages and heap objects with zeroes. + Format: 0 | 1 + Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON. + init_pkru= [x86] Specify the default memory protection keys rights register contents for all processes. 0x55555554 by default (disallow access to all but pkey 0). Can diff --git a/include/linux/mm.h b/include/linux/mm.h index 6193bea33594..d0506e84e899 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2463,6 +2463,30 @@ static inline void kernel_poison_pages(struct page *page, int numpages, static inline bool page_is_poisoned(struct page *page) { return false; } #endif +#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON +DECLARE_STATIC_KEY_TRUE(init_on_alloc); +#else +DECLARE_STATIC_KEY_FALSE(init_on_alloc); +#endif +static inline bool want_init_on_alloc(gfp_t flags) +{ + if (static_branch_unlikely(&init_on_alloc) && + !page_poisoning_enabled()) + return true; + return flags & __GFP_ZERO; +} + +#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON +DECLARE_STATIC_KEY_TRUE(init_on_free); +#else +DECLARE_STATIC_KEY_FALSE(init_on_free); +#endif +static inline bool want_init_on_free(void) +{ + return static_branch_unlikely(&init_on_free) && + !page_poisoning_enabled(); +} + #ifdef CONFIG_DEBUG_PAGEALLOC extern bool _debug_pagealloc_enabled; extern void __kernel_map_pages(struct page *page, int numpages, int enable); diff --git a/mm/dmapool.c b/mm/dmapool.c index 4d90a64b2fdc..207b89212f27 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -379,7 +379,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, #endif spin_unlock_irqrestore(&pool->lock, flags); - if (mem_flags & __GFP_ZERO) + if (want_init_on_alloc(mem_flags)) memset(retval, 0, pool->size); return retval; @@ -429,6 +429,8 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma) } offset = vaddr - page->vaddr; + if (want_init_on_free()) + memset(vaddr, 0, pool->size); #ifdef DMAPOOL_DEBUG if ((dma - page->dma) != offset) { spin_unlock_irqrestore(&pool->lock, flags); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3c86ff952a03..8e35ef3e067d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -130,6 +130,55 @@ unsigned long totalcma_pages __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; +#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON +DEFINE_STATIC_KEY_TRUE(init_on_alloc); +#else +DEFINE_STATIC_KEY_FALSE(init_on_alloc); +#endif +EXPORT_SYMBOL(init_on_alloc); + +#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON +DEFINE_STATIC_KEY_TRUE(init_on_free); +#else +DEFINE_STATIC_KEY_FALSE(init_on_free); +#endif +EXPORT_SYMBOL(init_on_free); + +static int __init early_init_on_alloc(char *buf) +{ + int ret; + bool bool_result; + + if (!buf) + return -EINVAL; + ret = kstrtobool(buf, &bool_result); + if (bool_result && page_poisoning_enabled()) + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n"); + if (bool_result) + static_branch_enable(&init_on_alloc); + else + static_branch_disable(&init_on_alloc); + return ret; +} +early_param("init_on_alloc", early_init_on_alloc); + +static int __init early_init_on_free(char *buf) +{ + int ret; + bool bool_result; + + if (!buf) + return -EINVAL; + ret = kstrtobool(buf, &bool_result); + if (bool_result && page_poisoning_enabled()) + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n"); + if (bool_result) + static_branch_enable(&init_on_free); + else + static_branch_disable(&init_on_free); + return ret; +} +early_param("init_on_free", early_init_on_free); /* * A cached value of the page's pageblock's migratetype, used when the page is @@ -1026,6 +1075,14 @@ out: return ret; } +static void kernel_init_free_pages(struct page *page, int numpages) +{ + int i; + + for (i = 0; i < numpages; i++) + clear_highpage(page + i); +} + static __always_inline bool free_pages_prepare(struct page *page, unsigned int order, bool check_free) { @@ -1077,6 +1134,9 @@ static __always_inline bool free_pages_prepare(struct page *page, PAGE_SIZE << order); } arch_free_page(page, order); + if (want_init_on_free()) + kernel_init_free_pages(page, 1 << order); + kernel_poison_pages(page, 1 << order, 0); kernel_map_pages(page, 1 << order, 0); kasan_free_pages(page, order); @@ -1732,8 +1792,8 @@ static inline int check_new_page(struct page *page) static inline bool free_pages_prezeroed(void) { - return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && - page_poisoning_enabled(); + return (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && + page_poisoning_enabled()) || want_init_on_free(); } #ifdef CONFIG_DEBUG_VM @@ -1786,13 +1846,10 @@ inline void post_alloc_hook(struct page *page, unsigned int order, static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, unsigned int alloc_flags) { - int i; - post_alloc_hook(page, order, gfp_flags); - if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO)) - for (i = 0; i < (1 << order); i++) - clear_highpage(page + i); + if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags)) + kernel_init_free_pages(page, 1 << order); if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); diff --git a/mm/slab.c b/mm/slab.c index 32aec290ab00..e8b1e6ea211f 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1897,6 +1897,14 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, cachep->num = 0; + /* + * If slab auto-initialization on free is enabled, store the freelist + * off-slab, so that its contents don't end up in one of the allocated + * objects. + */ + if (unlikely(slab_want_init_on_free(cachep))) + return false; + if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU) return false; @@ -3330,7 +3338,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); - if (unlikely(flags & __GFP_ZERO) && ptr) + if (unlikely(slab_want_init_on_alloc(flags, cachep)) && ptr) memset(ptr, 0, cachep->object_size); slab_post_alloc_hook(cachep, flags, 1, &ptr); @@ -3387,7 +3395,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); prefetchw(objp); - if (unlikely(flags & __GFP_ZERO) && objp) + if (unlikely(slab_want_init_on_alloc(flags, cachep)) && objp) memset(objp, 0, cachep->object_size); slab_post_alloc_hook(cachep, flags, 1, &objp); @@ -3508,6 +3516,8 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); + if (unlikely(slab_want_init_on_free(cachep))) + memset(objp, 0, cachep->object_size); kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, caller); @@ -3593,7 +3603,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_); /* Clear memory outside IRQ disabled section */ - if (unlikely(flags & __GFP_ZERO)) + if (unlikely(slab_want_init_on_alloc(flags, s))) for (i = 0; i < size; i++) memset(p[i], 0, s->object_size); diff --git a/mm/slab.h b/mm/slab.h index 437f96c955d5..20ee67d11bb4 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -517,4 +517,24 @@ static inline int cache_random_seq_create(struct kmem_cache *cachep, static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } #endif /* CONFIG_SLAB_FREELIST_RANDOM */ +static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) +{ + if (static_branch_unlikely(&init_on_alloc)) { + if (c->ctor) + return false; + if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) + return flags & __GFP_ZERO; + return true; + } + return flags & __GFP_ZERO; +} + +static inline bool slab_want_init_on_free(struct kmem_cache *c) +{ + if (static_branch_unlikely(&init_on_free)) + return !(c->ctor || + (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); + return false; +} + #endif /* MM_SLAB_H */ diff --git a/mm/slub.c b/mm/slub.c index a35a7d0dc1d7..5d875a7e0a72 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1300,6 +1300,10 @@ check_slabs: if (*str == ',') slub_debug_slabs = str + 1; out: + if ((static_branch_unlikely(&init_on_alloc) || + static_branch_unlikely(&init_on_free)) && + (slub_debug & SLAB_POISON)) + pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n"); return 1; } @@ -1406,6 +1410,28 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x) static inline bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail) { + + void *object; + void *next = *head; + void *old_tail = *tail ? *tail : *head; + int rsize; + + if (slab_want_init_on_free(s)) + do { + object = next; + next = get_freepointer(s, object); + /* + * Clear the object and the metadata, but don't touch + * the redzone. + */ + memset(object, 0, s->object_size); + rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad + : 0; + memset((char *)object + s->inuse, 0, + s->size - s->inuse - rsize); + set_freepointer(s, object, next); + } while (object != old_tail); + /* * Compiler cannot detect this function can be removed if slab_free_hook() * evaluates to nothing. Thus, catch all relevant config debug options here. @@ -1415,9 +1441,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, defined(CONFIG_DEBUG_OBJECTS_FREE) || \ defined(CONFIG_KASAN) - void *object; - void *next = *head; - void *old_tail = *tail ? *tail : *head; + next = *head; /* Head and tail of the reconstructed freelist */ *head = NULL; @@ -2751,8 +2775,14 @@ redo: prefetch_freepointer(s, next_object); stat(s, ALLOC_FASTPATH); } + /* + * If the object has been wiped upon free, make sure it's fully + * initialized by zeroing out freelist pointer. + */ + if (unlikely(slab_want_init_on_free(s)) && object) + memset(object + s->offset, 0, sizeof(void *)); - if (unlikely(gfpflags & __GFP_ZERO) && object) + if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(object, 0, s->object_size); slab_post_alloc_hook(s, gfpflags, 1, &object); @@ -3174,7 +3204,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, local_irq_enable(); /* Clear memory outside IRQ disabled fastpath loop */ - if (unlikely(flags & __GFP_ZERO)) { + if (unlikely(slab_want_init_on_alloc(flags, s))) { int j; for (j = 0; j < i; j++) diff --git a/net/core/sock.c b/net/core/sock.c index c8d39092e8bf..dca9432af9b9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1461,7 +1461,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); if (!sk) return sk; - if (priority & __GFP_ZERO) + if (want_init_on_alloc(priority)) sk_prot_clear_nulls(sk, prot->obj_size); } else sk = kmalloc(prot->obj_size, priority); diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index f3784bca35ea..9cbecabd5600 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -36,6 +36,35 @@ choice endchoice +config INIT_ON_ALLOC_DEFAULT_ON + bool "Enable heap memory zeroing on allocation by default" + help + This has the effect of setting "init_on_alloc=1" on the kernel + command line. This can be disabled with "init_on_alloc=0". + When "init_on_alloc" is enabled, all page allocator and slab + allocator memory will be zeroed when allocated, eliminating + many kinds of "uninitialized heap memory" flaws, especially + heap content exposures. The performance impact varies by + workload, but most cases see <1% impact. Some synthetic + workloads have measured as high as 7%. + +config INIT_ON_FREE_DEFAULT_ON + bool "Enable heap memory zeroing on free by default" + help + This has the effect of setting "init_on_free=1" on the kernel + command line. This can be disabled with "init_on_free=0". + Similar to "init_on_alloc", when "init_on_free" is enabled, + all page allocator and slab allocator memory will be zeroed + when freed, eliminating many kinds of "uninitialized heap memory" + flaws, especially heap content exposures. The primary difference + with "init_on_free" is that data lifetime in memory is reduced, + as anything freed is wiped immediately, making live forensics or + cold boot memory attacks unable to recover freed memory contents. + The performance impact varies by workload, but is more expensive + than "init_on_alloc" due to the negative cache effects of + touching "cold" memory areas. Most cases see 3-5% impact. Some + synthetic workloads have measured as high as 8%. + endmenu endmenu