@ -4727,28 +4727,17 @@ out_unlock:
# ifdef CONFIG_NO_HZ
/*
* idle load balancing details
* - One of the idle CPUs nominates itself as idle load_balancer , while
* entering idle .
* - This idle load balancer CPU will also go into tickless mode when
* it is idle , just like all other idle CPUs
* - When one of the busy CPUs notice that there may be an idle rebalancing
* needed , they will kick the idle load balancer , which then does idle
* load balancing for all the idle CPUs .
*/
static struct {
atomic_t load_balancer ;
atomic_t first_pick_cpu ;
atomic_t second_pick_cpu ;
cpumask_var_t idle_cpus_mask ;
cpumask_var_t grp_idle_mask ;
atomic_t nr_cpus ;
unsigned long next_balance ; /* in jiffy units */
} nohz ____cacheline_aligned ;
int get_nohz_load_balancer ( void )
{
return atomic_read ( & nohz . load_balancer ) ;
}
# if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
/**
* lowest_flag_domain - Return lowest sched_domain containing flag .
@ -4825,9 +4814,9 @@ static inline int is_semi_idle_group(struct sched_group *ilb_group)
*/
static int find_new_ilb ( int cpu )
{
int ilb = cpumask_first ( nohz . idle_cpus_mask ) ;
struct sched_domain * sd ;
struct sched_group * ilb_group ;
int ilb = nr_cpu_ids ;
/*
* Have idle load balancer selection from semi - idle packages only
@ -4881,13 +4870,10 @@ static void nohz_balancer_kick(int cpu)
nohz . next_balance + + ;
ilb_cpu = get_nohz_load_balancer ( ) ;
ilb_cpu = find_new_ilb ( cpu ) ;
if ( ilb_cpu > = nr_cpu_ids ) {
ilb_cpu = cpumask_first ( nohz . idle_cpus_mask ) ;
if ( ilb_cpu > = nr_cpu_ids )
return ;
}
if ( ilb_cpu > = nr_cpu_ids )
return ;
if ( test_and_set_bit ( NOHZ_BALANCE_KICK , nohz_flags ( cpu ) ) )
return ;
@ -4932,77 +4918,20 @@ void set_cpu_sd_state_idle(void)
}
/*
* This routine will try to nominate the ilb ( idle load balancing )
* owner among the cpus whose ticks are stopped . ilb owner will do the idle
* load balancing on behalf of all those cpus .
*
* When the ilb owner becomes busy , we will not have new ilb owner until some
* idle CPU wakes up and goes back to idle or some busy CPU tries to kick
* idle load balancing by kicking one of the idle CPUs .
*
* Ticks are stopped for the ilb owner as well , with busy CPU kicking this
* ilb owner CPU in future ( when there is a need for idle load balancing on
* behalf of all idle CPUs ) .
* This routine will record that this cpu is going idle with tick stopped .
* This info will be used in performing idle load balancing in the future .
*/
void select_nohz_load_balancer ( int stop_tick )
{
int cpu = smp_processor_id ( ) ;
if ( stop_tick ) {
if ( ! cpu_active ( cpu ) ) {
if ( atomic_read ( & nohz . load_balancer ) ! = cpu )
return ;
/*
* If we are going offline and still the leader ,
* give up !
*/
if ( atomic_cmpxchg ( & nohz . load_balancer , cpu ,
nr_cpu_ids ) ! = cpu )
BUG ( ) ;
if ( test_bit ( NOHZ_TICK_STOPPED , nohz_flags ( cpu ) ) )
return ;
}
cpumask_set_cpu ( cpu , nohz . idle_cpus_mask ) ;
if ( atomic_read ( & nohz . first_pick_cpu ) = = cpu )
atomic_cmpxchg ( & nohz . first_pick_cpu , cpu , nr_cpu_ids ) ;
if ( atomic_read ( & nohz . second_pick_cpu ) = = cpu )
atomic_cmpxchg ( & nohz . second_pick_cpu , cpu , nr_cpu_ids ) ;
if ( atomic_read ( & nohz . load_balancer ) > = nr_cpu_ids ) {
int new_ilb ;
/* make me the ilb owner */
if ( atomic_cmpxchg ( & nohz . load_balancer , nr_cpu_ids ,
cpu ) ! = nr_cpu_ids )
return ;
/*
* Check to see if there is a more power - efficient
* ilb .
*/
new_ilb = find_new_ilb ( cpu ) ;
if ( new_ilb < nr_cpu_ids & & new_ilb ! = cpu ) {
atomic_set ( & nohz . load_balancer , nr_cpu_ids ) ;
resched_cpu ( new_ilb ) ;
return ;
}
return ;
}
atomic_inc ( & nohz . nr_cpus ) ;
set_bit ( NOHZ_TICK_STOPPED , nohz_flags ( cpu ) ) ;
} else {
if ( ! cpumask_test_cpu ( cpu , nohz . idle_cpus_mask ) )
return ;
cpumask_clear_cpu ( cpu , nohz . idle_cpus_mask ) ;
if ( atomic_read ( & nohz . load_balancer ) = = cpu )
if ( atomic_cmpxchg ( & nohz . load_balancer , cpu ,
nr_cpu_ids ) ! = cpu )
BUG ( ) ;
}
return ;
}
@ -5113,7 +5042,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
goto end ;
for_each_cpu ( balance_cpu , nohz . idle_cpus_mask ) {
if ( balance_cpu = = this_cpu )
if ( balance_cpu = = this_cpu | | ! idle_cpu ( this_cpu ) )
continue ;
/*
@ -5141,22 +5070,18 @@ end:
}
/*
* Current heuristic for kicking the idle load balancer
* - first_pick_cpu is the one of the busy CPUs . It will kick
* idle load balancer when it has more than one process active . This
* eliminates the need for idle load balancing altogether when we have
* only one running process in the system ( common case ) .
* - If there are more than one busy CPU , idle load balancer may have
* to run for active_load_balance to happen ( i . e . , two busy CPUs are
* SMT or core siblings and can run better if they move to different
* physical CPUs ) . So , second_pick_cpu is the second of the busy CPUs
* which will kick idle load balancer as soon as it has any load .
* Current heuristic for kicking the idle load balancer in the presence
* of an idle cpu is the system .
* - This rq has more than one task .
* - At any scheduler domain level , this cpu ' s scheduler group has multiple
* busy cpu ' s exceeding the group ' s power .
* - For SD_ASYM_PACKING , if the lower numbered cpu ' s in the scheduler
* domain span are idle .
*/
static inline int nohz_kick_needed ( struct rq * rq , int cpu )
{
unsigned long now = jiffies ;
int ret ;
int first_pick_cpu , second_pick_cpu ;
struct sched_domain * sd ;
if ( unlikely ( idle_cpu ( cpu ) ) )
return 0 ;
@ -5166,32 +5091,44 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
* busy tick after returning from idle , we will update the busy stats .
*/
set_cpu_sd_state_busy ( ) ;
if ( unlikely ( test_bit ( NOHZ_TICK_STOPPED , nohz_flags ( cpu ) ) ) )
if ( unlikely ( test_bit ( NOHZ_TICK_STOPPED , nohz_flags ( cpu ) ) ) ) {
clear_bit ( NOHZ_TICK_STOPPED , nohz_flags ( cpu ) ) ;
cpumask_clear_cpu ( cpu , nohz . idle_cpus_mask ) ;
atomic_dec ( & nohz . nr_cpus ) ;
}
/*
* None are in tickless mode and hence no need for NOHZ idle load
* balancing .
*/
if ( likely ( ! atomic_read ( & nohz . nr_cpus ) ) )
return 0 ;
if ( time_before ( now , nohz . next_balance ) )
return 0 ;
first_pick_cpu = atomic_read ( & nohz . first_pick_cpu ) ;
second_pick_cpu = atomic_read ( & nohz . second_pick_cpu ) ;
if ( rq - > nr_running > = 2 )
goto need_kick ;
if ( first_pick_cpu < nr_cpu_ids & & first_pick_cpu ! = cpu & &
second_pick_cpu < nr_cpu_ids & & second_pick_cpu ! = cpu )
return 0 ;
for_each_domain ( cpu , sd ) {
struct sched_group * sg = sd - > groups ;
struct sched_group_power * sgp = sg - > sgp ;
int nr_busy = atomic_read ( & sgp - > nr_busy_cpus ) ;
ret = atomic_cmpxchg ( & nohz . first_pick_cpu , nr_cpu_ids , cpu ) ;
if ( ret = = nr_cpu_ids | | ret = = cpu ) {
atomic_cmpxchg ( & nohz . second_pick_cpu , cpu , nr_cpu_ids ) ;
if ( rq - > nr_running > 1 )
return 1 ;
} else {
ret = atomic_cmpxchg ( & nohz . second_pick_cpu , nr_cpu_ids , cpu ) ;
if ( ret = = nr_cpu_ids | | ret = = cpu ) {
if ( rq - > nr_running )
return 1 ;
}
if ( sd - > flags & SD_SHARE_PKG_RESOURCES & & nr_busy > 1 )
goto need_kick ;
if ( sd - > flags & SD_ASYM_PACKING & & nr_busy ! = sg - > group_weight
& & ( cpumask_first_and ( nohz . idle_cpus_mask ,
sched_domain_span ( sd ) ) < cpu ) )
goto need_kick ;
if ( ! ( sd - > flags & ( SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING ) ) )
break ;
}
return 0 ;
need_kick :
return 1 ;
}
# else
static void nohz_idle_balance ( int this_cpu , enum cpu_idle_type idle ) { }
@ -5652,9 +5589,6 @@ __init void init_sched_fair_class(void)
# ifdef CONFIG_NO_HZ
zalloc_cpumask_var ( & nohz . idle_cpus_mask , GFP_NOWAIT ) ;
alloc_cpumask_var ( & nohz . grp_idle_mask , GFP_NOWAIT ) ;
atomic_set ( & nohz . load_balancer , nr_cpu_ids ) ;
atomic_set ( & nohz . first_pick_cpu , nr_cpu_ids ) ;
atomic_set ( & nohz . second_pick_cpu , nr_cpu_ids ) ;
# endif
# endif /* SMP */