From 4736a011ba189a4bb9b2fe97019f3c8227a35de7 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Fri, 5 May 2023 15:41:22 -0700 Subject: [PATCH] simple_lmk: Fix victim scheduling priority elevation As it turns out, victim scheduling priority elevation has always been broken for two reasons: 1. The minimum valid RT priority is 1, not 0. As a result, sched_setscheduler_nocheck() always fails with -EINVAL. 2. The thread within a victim thread group which happens to hold the mm is not necessarily the only thread with references to the mm, and isn't necessarily the thread which will release the final mm reference. As a result, victim threads which hold mm references may take a while to release them, and the unlucky thread which puts the final mm reference may take a very long time to release all memory if it doesn't have RT scheduling priority. These issues cause victims to often take a very long time to release their memory, possibly up to several seconds depending on system load. This, in turn, causes Simple LMK to constantly hit the reclaim timeout and kill more processes, with Simple LMK being rather ineffective since victims may not release any memory for several seconds. Fix the broken scheduling priority elevation by changing the RT priority to the valid lowest priority of 1 and applying it to all threads in the thread group, instead of just the thread which holds the mm. Signed-off-by: Sultan Alsawaf --- drivers/android/simple_lmk.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/android/simple_lmk.c b/drivers/android/simple_lmk.c index 7afd721845bf..1200de5386b7 100644 --- a/drivers/android/simple_lmk.c +++ b/drivers/android/simple_lmk.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2019-2021 Sultan Alsawaf . + * Copyright (C) 2019-2023 Sultan Alsawaf . */ #define pr_fmt(fmt) "simple_lmk: " fmt @@ -225,7 +225,9 @@ static void scan_and_kill(void) /* Kill the victims */ for (i = 0; i < nr_to_kill; i++) { - static const struct sched_param sched_zero_prio; + static const struct sched_param min_rt_prio = { + .sched_priority = 1 + }; struct victim_info *victim = &victims[i]; struct task_struct *t, *vtsk = victim->tsk; @@ -236,15 +238,23 @@ static void scan_and_kill(void) /* Accelerate the victim's death by forcing the kill signal */ do_send_sig_info(SIGKILL, SEND_SIG_FORCED, vtsk, true); - /* Mark the thread group dead so that other kernel code knows */ + /* + * Mark the thread group dead so that other kernel code knows, + * and then elevate the thread group to SCHED_RR with minimum RT + * priority. The entire group needs to be elevated because + * there's no telling which threads have references to the mm as + * well as which thread will happen to put the final reference + * and release the mm's memory. If the mm is released from a + * thread with low scheduling priority then it may take a very + * long time for exit_mmap() to complete. + */ rcu_read_lock(); for_each_thread(vtsk, t) set_tsk_thread_flag(t, TIF_MEMDIE); + for_each_thread(vtsk, t) + sched_setscheduler_nocheck(t, SCHED_RR, &min_rt_prio); rcu_read_unlock(); - /* Elevate the victim to SCHED_RR with zero RT priority */ - sched_setscheduler_nocheck(vtsk, SCHED_RR, &sched_zero_prio); - /* Allow the victim to run on any CPU. This won't schedule. */ set_cpus_allowed_ptr(vtsk, cpu_all_mask);