@ -115,6 +115,7 @@
# include <linux/reciprocal_div.h>
# include <linux/debugobjects.h>
# include <linux/kmemcheck.h>
# include <linux/memory.h>
# include <asm/cacheflush.h>
# include <asm/tlbflush.h>
@ -144,30 +145,6 @@
# define BYTES_PER_WORD sizeof(void *)
# define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
# ifndef ARCH_KMALLOC_MINALIGN
/*
* Enforce a minimum alignment for the kmalloc caches .
* Usually , the kmalloc caches are cache_line_size ( ) aligned , except when
* DEBUG and FORCED_DEBUG are enabled , then they are BYTES_PER_WORD aligned .
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64 - bit integer .
* ARCH_KMALLOC_MINALIGN allows that .
* Note that increasing this value may disable some debug features .
*/
# define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
# endif
# ifndef ARCH_SLAB_MINALIGN
/*
* Enforce a minimum alignment for all caches .
* Intended for archs that get misalignment faults even for BYTES_PER_WORD
* aligned buffers . Includes ARCH_KMALLOC_MINALIGN .
* If possible : Do not enable this flag for CONFIG_DEBUG_SLAB , it disables
* some debug features .
*/
# define ARCH_SLAB_MINALIGN 0
# endif
# ifndef ARCH_KMALLOC_FLAGS
# define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
# endif
@ -1102,6 +1079,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
}
# endif
/*
* Allocates and initializes nodelists for a node on each slab cache , used for
* either memory or cpu hotplug . If memory is being hot - added , the kmem_list3
* will be allocated off - node since memory is not yet online for the new node .
* When hotplugging memory or a cpu , existing nodelists are not replaced if
* already in use .
*
* Must hold cache_chain_mutex .
*/
static int init_cache_nodelists_node ( int node )
{
struct kmem_cache * cachep ;
struct kmem_list3 * l3 ;
const int memsize = sizeof ( struct kmem_list3 ) ;
list_for_each_entry ( cachep , & cache_chain , next ) {
/*
* Set up the size64 kmemlist for cpu before we can
* begin anything . Make sure some other cpu on this
* node has not already allocated this
*/
if ( ! cachep - > nodelists [ node ] ) {
l3 = kmalloc_node ( memsize , GFP_KERNEL , node ) ;
if ( ! l3 )
return - ENOMEM ;
kmem_list3_init ( l3 ) ;
l3 - > next_reap = jiffies + REAPTIMEOUT_LIST3 +
( ( unsigned long ) cachep ) % REAPTIMEOUT_LIST3 ;
/*
* The l3s don ' t come and go as CPUs come and
* go . cache_chain_mutex is sufficient
* protection here .
*/
cachep - > nodelists [ node ] = l3 ;
}
spin_lock_irq ( & cachep - > nodelists [ node ] - > list_lock ) ;
cachep - > nodelists [ node ] - > free_limit =
( 1 + nr_cpus_node ( node ) ) *
cachep - > batchcount + cachep - > num ;
spin_unlock_irq ( & cachep - > nodelists [ node ] - > list_lock ) ;
}
return 0 ;
}
static void __cpuinit cpuup_canceled ( long cpu )
{
struct kmem_cache * cachep ;
@ -1172,7 +1195,7 @@ static int __cpuinit cpuup_prepare(long cpu)
struct kmem_cache * cachep ;
struct kmem_list3 * l3 = NULL ;
int node = cpu_to_node ( cpu ) ;
const int memsize = sizeof ( struct kmem_list3 ) ;
int err ;
/*
* We need to do this right in the beginning since
@ -1180,35 +1203,9 @@ static int __cpuinit cpuup_prepare(long cpu)
* kmalloc_node allows us to add the slab to the right
* kmem_list3 and not this cpu ' s kmem_list3
*/
list_for_each_entry ( cachep , & cache_chain , next ) {
/*
* Set up the size64 kmemlist for cpu before we can
* begin anything . Make sure some other cpu on this
* node has not already allocated this
*/
if ( ! cachep - > nodelists [ node ] ) {
l3 = kmalloc_node ( memsize , GFP_KERNEL , node ) ;
if ( ! l3 )
goto bad ;
kmem_list3_init ( l3 ) ;
l3 - > next_reap = jiffies + REAPTIMEOUT_LIST3 +
( ( unsigned long ) cachep ) % REAPTIMEOUT_LIST3 ;
/*
* The l3s don ' t come and go as CPUs come and
* go . cache_chain_mutex is sufficient
* protection here .
*/
cachep - > nodelists [ node ] = l3 ;
}
spin_lock_irq ( & cachep - > nodelists [ node ] - > list_lock ) ;
cachep - > nodelists [ node ] - > free_limit =
( 1 + nr_cpus_node ( node ) ) *
cachep - > batchcount + cachep - > num ;
spin_unlock_irq ( & cachep - > nodelists [ node ] - > list_lock ) ;
}
err = init_cache_nodelists_node ( node ) ;
if ( err < 0 )
goto bad ;
/*
* Now we can go ahead with allocating the shared arrays and
@ -1331,11 +1328,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
& cpuup_callback , NULL , 0
} ;
# if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
/*
* Drains freelist for a node on each slab cache , used for memory hot - remove .
* Returns - EBUSY if all objects cannot be drained so that the node is not
* removed .
*
* Must hold cache_chain_mutex .
*/
static int __meminit drain_cache_nodelists_node ( int node )
{
struct kmem_cache * cachep ;
int ret = 0 ;
list_for_each_entry ( cachep , & cache_chain , next ) {
struct kmem_list3 * l3 ;
l3 = cachep - > nodelists [ node ] ;
if ( ! l3 )
continue ;
drain_freelist ( cachep , l3 , l3 - > free_objects ) ;
if ( ! list_empty ( & l3 - > slabs_full ) | |
! list_empty ( & l3 - > slabs_partial ) ) {
ret = - EBUSY ;
break ;
}
}
return ret ;
}
static int __meminit slab_memory_callback ( struct notifier_block * self ,
unsigned long action , void * arg )
{
struct memory_notify * mnb = arg ;
int ret = 0 ;
int nid ;
nid = mnb - > status_change_nid ;
if ( nid < 0 )
goto out ;
switch ( action ) {
case MEM_GOING_ONLINE :
mutex_lock ( & cache_chain_mutex ) ;
ret = init_cache_nodelists_node ( nid ) ;
mutex_unlock ( & cache_chain_mutex ) ;
break ;
case MEM_GOING_OFFLINE :
mutex_lock ( & cache_chain_mutex ) ;
ret = drain_cache_nodelists_node ( nid ) ;
mutex_unlock ( & cache_chain_mutex ) ;
break ;
case MEM_ONLINE :
case MEM_OFFLINE :
case MEM_CANCEL_ONLINE :
case MEM_CANCEL_OFFLINE :
break ;
}
out :
return ret ? notifier_from_errno ( ret ) : NOTIFY_OK ;
}
# endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
/*
* swap the static kmem_list3 with kmalloced memory
*/
static void init_list ( struct kmem_cache * cachep , struct kmem_list3 * list ,
int nodeid )
static void __init init_list ( struct kmem_cache * cachep , struct kmem_list3 * list ,
int nodeid )
{
struct kmem_list3 * ptr ;
@ -1580,6 +1641,14 @@ void __init kmem_cache_init_late(void)
*/
register_cpu_notifier ( & cpucache_notifier ) ;
# ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
* nodelists .
*/
hotplug_memory_notifier ( slab_memory_callback , SLAB_CALLBACK_PRI ) ;
# endif
/*
* The reap timers are started later , with a module init call : That part
* of the kernel is not yet operational .
@ -2220,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
if ( ralign < align ) {
ralign = align ;
}
/* disable debug if necessary */
if ( ralign > __alignof__ ( unsigned long long ) )
/* disable debug if not aligning with REDZONE_ALIGN */
if ( ralign & ( __alignof__ ( unsigned long long ) - 1 ) )
flags & = ~ ( SLAB_RED_ZONE | SLAB_STORE_USER ) ;
/*
* 4 ) Store it .
@ -2247,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
*/
if ( flags & SLAB_RED_ZONE ) {
/* add space for red zone words */
cachep - > obj_offset + = sizeof ( uns igned long long ) ;
size + = 2 * sizeof ( unsigned long long ) ;
cachep - > obj_offset + = al ign;
size + = align + sizeof ( unsigned long long ) ;
}
if ( flags & SLAB_STORE_USER ) {
/* user store requires one word storage behind the end of
@ -4216,10 +4285,11 @@ static int s_show(struct seq_file *m, void *p)
unsigned long node_frees = cachep - > node_frees ;
unsigned long overflows = cachep - > node_overflow ;
seq_printf ( m , " : globalstat %7lu %6lu %5lu %4lu \
% 4lu % 4lu % 4lu % 4lu % 4lu " , allocs, high, grown,
reaped , errors , max_freeable , node_allocs ,
node_frees , overflows ) ;
seq_printf ( m , " : globalstat %7lu %6lu %5lu %4lu "
" %4lu %4lu %4lu %4lu %4lu " ,
allocs , high , grown ,
reaped , errors , max_freeable , node_allocs ,
node_frees , overflows ) ;
}
/* cpu stats */
{