diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 6503dafb8e7c..f9af04345897 100755 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -27,6 +27,9 @@ struct vmpressure { struct mutex events_lock; struct work_struct work; + + atomic_long_t users; + rwlock_t users_lock; }; struct mem_cgroup; @@ -38,6 +41,8 @@ extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, int order); extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio, int order); +extern bool vmpressure_inc_users(int order); +extern void vmpressure_dec_users(void); #ifdef CONFIG_MEMCG extern void vmpressure_init(struct vmpressure *vmpr); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 51de840d394d..564d0d66986b 100755 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4090,6 +4090,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, task_cputime(current, &utime, &stime_s); bool woke_kswapd = false; + bool used_vmpressure = false; /* * We also sanity check to catch abuse of atomic reserves being used by @@ -4128,6 +4129,8 @@ retry_cpuset: atomic_long_inc(&kswapd_waiters); woke_kswapd = true; } + if (!used_vmpressure) + used_vmpressure = vmpressure_inc_users(order); wake_all_kswapds(order, ac); } @@ -4222,6 +4225,8 @@ retry: goto nopage; /* Try direct reclaim and then allocating */ + if (!used_vmpressure) + used_vmpressure = vmpressure_inc_users(order); page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, &did_some_progress); pages_reclaimed += did_some_progress; @@ -4357,6 +4362,8 @@ got_pg: } if (woke_kswapd) atomic_long_dec(&kswapd_waiters); + if (used_vmpressure) + vmpressure_dec_users(); if (!page) warn_alloc(gfp_mask, ac->nodemask, "page allocation failure: order:%u", order); diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 50f4134e65cf..8d6eeacaf3ad 100755 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -217,11 +217,12 @@ static void vmpressure_work_fn(struct work_struct *work) unsigned long scanned; unsigned long reclaimed; unsigned long pressure; + unsigned long flags; enum vmpressure_levels level; bool ancestor = false; bool signalled = false; - spin_lock(&vmpr->sr_lock); + spin_lock_irqsave(&vmpr->sr_lock, flags); /* * Several contexts might be calling vmpressure(), so it is * possible that the work was rescheduled again before the old @@ -232,14 +233,14 @@ static void vmpressure_work_fn(struct work_struct *work) */ scanned = vmpr->tree_scanned; if (!scanned) { - spin_unlock(&vmpr->sr_lock); + spin_unlock_irqrestore(&vmpr->sr_lock, flags); return; } reclaimed = vmpr->tree_reclaimed; vmpr->tree_scanned = 0; vmpr->tree_reclaimed = 0; - spin_unlock(&vmpr->sr_lock); + spin_unlock_irqrestore(&vmpr->sr_lock, flags); pressure = vmpressure_calc_pressure(scanned, reclaimed); level = vmpressure_level(pressure); @@ -280,6 +281,7 @@ static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool critical, unsigned long reclaimed) { struct vmpressure *vmpr = memcg_to_vmpressure(memcg); + unsigned long flags; /* * If we got here with no pages scanned, then that is an indicator @@ -295,10 +297,10 @@ static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool critical, return; if (tree) { - spin_lock(&vmpr->sr_lock); + spin_lock_irqsave(&vmpr->sr_lock, flags); scanned = vmpr->tree_scanned += scanned; vmpr->tree_reclaimed += reclaimed; - spin_unlock(&vmpr->sr_lock); + spin_unlock_irqrestore(&vmpr->sr_lock, flags); if (!critical && scanned < calculate_vmpressure_win()) return; @@ -311,15 +313,15 @@ static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool critical, if (!memcg || memcg == root_mem_cgroup) return; - spin_lock(&vmpr->sr_lock); + spin_lock_irqsave(&vmpr->sr_lock, flags); scanned = vmpr->scanned += scanned; reclaimed = vmpr->reclaimed += reclaimed; if (!critical && scanned < calculate_vmpressure_win()) { - spin_unlock(&vmpr->sr_lock); + spin_unlock_irqrestore(&vmpr->sr_lock, flags); return; } vmpr->scanned = vmpr->reclaimed = 0; - spin_unlock(&vmpr->sr_lock); + spin_unlock_irqrestore(&vmpr->sr_lock, flags); pressure = vmpressure_calc_pressure(scanned, reclaimed); level = vmpressure_level(pressure); @@ -343,17 +345,49 @@ static void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, bool critical, unsigned long reclaimed) { } #endif +bool vmpressure_inc_users(int order) +{ + struct vmpressure *vmpr = &global_vmpressure; + unsigned long flags; + + if (order > PAGE_ALLOC_COSTLY_ORDER) + return false; + + write_lock_irqsave(&vmpr->users_lock, flags); + if (atomic_long_inc_return_relaxed(&vmpr->users) == 1) { + /* Clear out stale vmpressure data when reclaim begins */ + spin_lock(&vmpr->sr_lock); + vmpr->scanned = 0; + vmpr->reclaimed = 0; + vmpr->stall = 0; + spin_unlock(&vmpr->sr_lock); + } + write_unlock_irqrestore(&vmpr->users_lock, flags); + + return true; +} + +void vmpressure_dec_users(void) +{ + struct vmpressure *vmpr = &global_vmpressure; + + /* Decrement the vmpressure user count with release semantics */ + smp_mb__before_atomic(); + atomic_long_dec(&vmpr->users); +} + static void vmpressure_global(gfp_t gfp, unsigned long scanned, bool critical, unsigned long reclaimed) { struct vmpressure *vmpr = &global_vmpressure; unsigned long pressure; unsigned long stall; + unsigned long flags; if (critical) scanned = calculate_vmpressure_win(); - spin_lock(&vmpr->sr_lock); + spin_lock_irqsave(&vmpr->sr_lock, flags); if (scanned) { vmpr->scanned += scanned; vmpr->reclaimed += reclaimed; @@ -366,14 +400,14 @@ static void vmpressure_global(gfp_t gfp, unsigned long scanned, bool critical, reclaimed = vmpr->reclaimed; if (!critical && scanned < calculate_vmpressure_win()) { - spin_unlock(&vmpr->sr_lock); + spin_unlock_irqrestore(&vmpr->sr_lock, flags); return; } } vmpr->scanned = 0; vmpr->reclaimed = 0; vmpr->stall = 0; - spin_unlock(&vmpr->sr_lock); + spin_unlock_irqrestore(&vmpr->sr_lock, flags); if (scanned) { pressure = vmpressure_calc_pressure(scanned, reclaimed); @@ -419,9 +453,25 @@ static void __vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool critical, void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, unsigned long scanned, unsigned long reclaimed, int order) { + struct vmpressure *vmpr = &global_vmpressure; + unsigned long flags; + if (order > PAGE_ALLOC_COSTLY_ORDER) return; + /* + * It's possible for kswapd to keep doing reclaim even though memory + * pressure isn't high anymore. We should only track vmpressure when + * there are failed memory allocations actively stuck in the page + * allocator's slow path. No failed allocations means pressure is fine. + */ + read_lock_irqsave(&vmpr->users_lock, flags); + if (!atomic_long_read(&vmpr->users)) { + read_unlock_irqrestore(&vmpr->users_lock, flags); + return; + } + read_unlock_irqrestore(&vmpr->users_lock, flags); + __vmpressure(gfp, memcg, false, tree, scanned, reclaimed); } @@ -590,6 +640,8 @@ void vmpressure_init(struct vmpressure *vmpr) mutex_init(&vmpr->events_lock); INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); + atomic_long_set(&vmpr->users, 0); + rwlock_init(&vmpr->users_lock); } /**