perf: Manage CPU hotplug events at core level

Currently, the perf's hotplug management is split between the
core and the PMU driver. That is, the PMU driver is responsible
for starting and stopping the perf-event counter in the cases
where the CPU is coming up and going down, respectively. However,
this approach is not scalable for the growing number of PMU drivers.
Moreover, due to partial cleanup at the perf-core, currently the
perf-events are prone to race conditions, for example between the
CPU hotplug teardown callback (perf_event_exit_cpu()) and the hrtimer
handler (perf_mux_hrtimer_handler()) that's used for multiplexing
the events.

Hence, the approach here is to achieve the hotplug management
at the perf-core level. The idea is to detach the event from the
context (perf_remove_from_context()), when the CPU is about to come
down and re-attach it back (perf_install_in_context()), when the CPU
comes back online.

The approach involves removing the logic for maintaining zombie
events (PERF_EVENT_STATE_ZOMBIE) and let the dormant list itself
carry the events whose CPUs are offline.

Change-Id: I39503d1f4b0b3937920b666a4f04875162fad3b3
Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
tirimbino
Raghavendra Rao Ananta 6 years ago
parent 292276ff0d
commit 77257e46ef
  1. 4
      arch/arm64/kernel/perf_event.c
  2. 14
      drivers/perf/arm_pmu.c
  3. 11
      include/linux/perf_event.h
  4. 2
      kernel/cpu.c
  5. 144
      kernel/events/core.c

@ -748,8 +748,8 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
struct perf_event *event = cpuc->events[idx]; struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc; struct hw_perf_event *hwc;
/* Ignore if we don't have an event or if it's a zombie event */ /* Ignore if we don't have an event */
if (!event || event->state == PERF_EVENT_STATE_ZOMBIE) if (!event || event->state != PERF_EVENT_STATE_ACTIVE)
continue; continue;
/* /*

@ -662,11 +662,7 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
if (!event) if (!event)
continue; continue;
/* if (event->state != PERF_EVENT_STATE_ACTIVE)
* Check if an attempt was made to free this event during
* the CPU went offline.
*/
if (event->state == PERF_EVENT_STATE_ZOMBIE)
continue; continue;
switch (cmd) { switch (cmd) {
@ -792,10 +788,8 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
if (!pmu || !cpumask_test_cpu(cpu, &pmu->supported_cpus)) if (!pmu || !cpumask_test_cpu(cpu, &pmu->supported_cpus))
return 0; return 0;
data.cmd = CPU_PM_EXIT; if (pmu->reset)
cpu_pm_pmu_common(&data); pmu->reset(pmu);
if (data.ret == NOTIFY_DONE)
return 0;
if (data.armpmu->pmu_state != ARM_PMU_STATE_OFF && if (data.armpmu->pmu_state != ARM_PMU_STATE_OFF &&
data.armpmu->plat_device) { data.armpmu->plat_device) {
@ -821,8 +815,6 @@ static int arm_perf_stopping_cpu(unsigned int cpu, struct hlist_node *node)
if (!pmu || !cpumask_test_cpu(cpu, &pmu->supported_cpus)) if (!pmu || !cpumask_test_cpu(cpu, &pmu->supported_cpus))
return 0; return 0;
data.cmd = CPU_PM_ENTER;
cpu_pm_pmu_common(&data);
/* Disarm the PMU IRQ before disappearing. */ /* Disarm the PMU IRQ before disappearing. */
if (data.armpmu->pmu_state == ARM_PMU_STATE_RUNNING && if (data.armpmu->pmu_state == ARM_PMU_STATE_RUNNING &&
data.armpmu->plat_device) { data.armpmu->plat_device) {

@ -490,9 +490,8 @@ struct perf_addr_filters_head {
* enum perf_event_active_state - the states of a event * enum perf_event_active_state - the states of a event
*/ */
enum perf_event_active_state { enum perf_event_active_state {
PERF_EVENT_STATE_DORMANT = -6, PERF_EVENT_STATE_DORMANT = -5,
PERF_EVENT_STATE_DEAD = -5, PERF_EVENT_STATE_DEAD = -4,
PERF_EVENT_STATE_ZOMBIE = -4,
PERF_EVENT_STATE_EXIT = -3, PERF_EVENT_STATE_EXIT = -3,
PERF_EVENT_STATE_ERROR = -2, PERF_EVENT_STATE_ERROR = -2,
PERF_EVENT_STATE_OFF = -1, PERF_EVENT_STATE_OFF = -1,
@ -715,8 +714,6 @@ struct perf_event {
struct list_head sb_list; struct list_head sb_list;
struct list_head zombie_entry;
/* /*
* Entry into the list that holds the events whose CPUs * Entry into the list that holds the events whose CPUs
* are offline. These events will be installed once the * are offline. These events will be installed once the
@ -1412,11 +1409,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
int perf_event_init_cpu(unsigned int cpu); int perf_event_init_cpu(unsigned int cpu);
int perf_event_exit_cpu(unsigned int cpu); int perf_event_exit_cpu(unsigned int cpu);
int perf_event_start_swevents(unsigned int cpu); int perf_event_restart_events(unsigned int cpu);
#else #else
#define perf_event_init_cpu NULL #define perf_event_init_cpu NULL
#define perf_event_exit_cpu NULL #define perf_event_exit_cpu NULL
#define perf_event_start_swevents NULL #define perf_event_restart_events NULL
#endif #endif
#endif /* _LINUX_PERF_EVENT_H */ #endif /* _LINUX_PERF_EVENT_H */

@ -1515,7 +1515,7 @@ static struct cpuhp_step cpuhp_ap_states[] = {
}, },
[CPUHP_AP_PERF_ONLINE] = { [CPUHP_AP_PERF_ONLINE] = {
.name = "perf:online", .name = "perf:online",
.startup.single = perf_event_start_swevents, .startup.single = perf_event_restart_events,
.teardown.single = perf_event_exit_cpu, .teardown.single = perf_event_exit_cpu,
}, },
[CPUHP_AP_WORKQUEUE_ONLINE] = { [CPUHP_AP_WORKQUEUE_ONLINE] = {

@ -2428,8 +2428,12 @@ static DEFINE_SPINLOCK(dormant_event_list_lock);
static void perf_prepare_install_in_context(struct perf_event *event) static void perf_prepare_install_in_context(struct perf_event *event)
{ {
spin_lock(&dormant_event_list_lock); spin_lock(&dormant_event_list_lock);
if (event->state == PERF_EVENT_STATE_DORMANT)
goto out;
event->state = PERF_EVENT_STATE_DORMANT; event->state = PERF_EVENT_STATE_DORMANT;
list_add_tail(&event->dormant_event_entry, &dormant_event_list); list_add_tail(&event->dormant_event_entry, &dormant_event_list);
out:
spin_unlock(&dormant_event_list_lock); spin_unlock(&dormant_event_list_lock);
} }
#endif #endif
@ -2524,7 +2528,7 @@ perf_install_in_context(struct perf_event_context *ctx,
if (!task) { if (!task) {
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
if (!cpu_online(cpu)) { if (per_cpu(is_hotplugging, cpu)) {
perf_prepare_install_in_context(event); perf_prepare_install_in_context(event);
return; return;
} }
@ -2614,7 +2618,6 @@ static void perf_deferred_install_in_context(int cpu)
spin_unlock(&dormant_event_list_lock); spin_unlock(&dormant_event_list_lock);
ctx = event->ctx; ctx = event->ctx;
perf_event__state_init(event);
mutex_lock(&ctx->mutex); mutex_lock(&ctx->mutex);
perf_install_in_context(ctx, event, cpu); perf_install_in_context(ctx, event, cpu);
@ -4473,14 +4476,6 @@ static void put_event(struct perf_event *event)
_free_event(event); _free_event(event);
} }
/*
* Maintain a zombie list to collect all the zombie events
*/
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static LIST_HEAD(zombie_list);
static DEFINE_SPINLOCK(zombie_list_lock);
#endif
/* /*
* Kill an event dead; while event:refcount will preserve the event * Kill an event dead; while event:refcount will preserve the event
* object, it will not preserve its functionality. Once the last 'user' * object, it will not preserve its functionality. Once the last 'user'
@ -4491,29 +4486,13 @@ static int __perf_event_release_kernel(struct perf_event *event)
struct perf_event_context *ctx = event->ctx; struct perf_event_context *ctx = event->ctx;
struct perf_event *child, *tmp; struct perf_event *child, *tmp;
/*
* If the cpu associated to this event is offline, set the event as a
* zombie event. The cleanup of the cpu would be done if the CPU is
* back online.
*/
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
if (event->state == PERF_EVENT_STATE_ZOMBIE) if (event->cpu != -1) {
return 0; spin_lock(&dormant_event_list_lock);
if (event->state == PERF_EVENT_STATE_DORMANT)
if (event->cpu != -1 && per_cpu(is_hotplugging, event->cpu)) { list_del(&event->dormant_event_entry);
event->state = PERF_EVENT_STATE_ZOMBIE; spin_unlock(&dormant_event_list_lock);
spin_lock(&zombie_list_lock);
list_add_tail(&event->zombie_entry, &zombie_list);
spin_unlock(&zombie_list_lock);
return 0;
} }
spin_lock(&dormant_event_list_lock);
if (event->state == PERF_EVENT_STATE_DORMANT)
list_del(&event->dormant_event_entry);
spin_unlock(&dormant_event_list_lock);
#endif #endif
/* /*
@ -9864,7 +9843,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
INIT_LIST_HEAD(&event->rb_entry); INIT_LIST_HEAD(&event->rb_entry);
INIT_LIST_HEAD(&event->active_entry); INIT_LIST_HEAD(&event->active_entry);
INIT_LIST_HEAD(&event->addr_filters.list); INIT_LIST_HEAD(&event->addr_filters.list);
INIT_LIST_HEAD(&event->zombie_entry);
INIT_HLIST_NODE(&event->hlist_entry); INIT_HLIST_NODE(&event->hlist_entry);
@ -11539,116 +11517,35 @@ void perf_swevent_init_cpu(unsigned int cpu)
} }
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void int perf_event_restart_events(unsigned int cpu)
check_hotplug_start_event(struct perf_event *event)
{ {
if (event->pmu->events_across_hotplug &&
event->attr.type == PERF_TYPE_SOFTWARE &&
event->pmu->start)
event->pmu->start(event, 0);
}
static void perf_event_zombie_cleanup(unsigned int cpu)
{
struct perf_event *event, *tmp;
spin_lock(&zombie_list_lock);
list_for_each_entry_safe(event, tmp, &zombie_list, zombie_entry) {
if (event->cpu != cpu)
continue;
list_del(&event->zombie_entry);
spin_unlock(&zombie_list_lock);
/*
* The detachment of the event with the
* PMU expects it to be in an active state
*/
event->state = PERF_EVENT_STATE_ACTIVE;
__perf_event_release_kernel(event);
spin_lock(&zombie_list_lock);
}
spin_unlock(&zombie_list_lock);
}
int perf_event_start_swevents(unsigned int cpu)
{
struct perf_event_context *ctx;
struct pmu *pmu;
struct perf_event *event;
int idx;
mutex_lock(&pmus_lock); mutex_lock(&pmus_lock);
perf_event_zombie_cleanup(cpu);
perf_deferred_install_in_context(cpu);
idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
mutex_lock(&ctx->mutex);
raw_spin_lock(&ctx->lock);
list_for_each_entry(event, &ctx->event_list, event_entry)
check_hotplug_start_event(event);
raw_spin_unlock(&ctx->lock);
mutex_unlock(&ctx->mutex);
}
srcu_read_unlock(&pmus_srcu, idx);
per_cpu(is_hotplugging, cpu) = false; per_cpu(is_hotplugging, cpu) = false;
perf_deferred_install_in_context(cpu);
mutex_unlock(&pmus_lock); mutex_unlock(&pmus_lock);
return 0; return 0;
} }
/*
* If keeping events across hotplugging is supported, do not
* remove the event list so event lives beyond CPU hotplug.
* The context is exited via an fd close path when userspace
* is done and the target CPU is online. If software clock
* event is active, then stop hrtimer associated with it.
* Start the timer when the CPU comes back online.
*/
static void
check_hotplug_remove_from_context(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
if (event->pmu->events_across_hotplug &&
event->attr.type == PERF_TYPE_SOFTWARE &&
event->pmu->stop)
event->pmu->stop(event, PERF_EF_UPDATE);
else if (!event->pmu->events_across_hotplug)
__perf_remove_from_context(event, cpuctx,
ctx, (void *)DETACH_GROUP);
}
static void __perf_event_exit_context(void *__info)
{
struct perf_event_context *ctx = __info;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct perf_event *event;
raw_spin_lock(&ctx->lock);
list_for_each_entry(event, &ctx->event_list, event_entry)
check_hotplug_remove_from_context(event, cpuctx, ctx);
raw_spin_unlock(&ctx->lock);
}
static void perf_event_exit_cpu_context(int cpu) static void perf_event_exit_cpu_context(int cpu)
{ {
struct perf_cpu_context *cpuctx; struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx; struct perf_event_context *ctx;
struct perf_event *event, *event_tmp;
struct pmu *pmu; struct pmu *pmu;
mutex_lock(&pmus_lock); mutex_lock(&pmus_lock);
per_cpu(is_hotplugging, cpu) = true;
list_for_each_entry(pmu, &pmus, entry) { list_for_each_entry(pmu, &pmus, entry) {
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx; ctx = &cpuctx->ctx;
mutex_lock(&ctx->mutex); mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); list_for_each_entry_safe(event, event_tmp, &ctx->event_list,
event_entry) {
perf_remove_from_context(event, DETACH_GROUP);
if (event->pmu->events_across_hotplug)
perf_prepare_install_in_context(event);
}
cpuctx->online = 0; cpuctx->online = 0;
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);
} }
@ -11686,7 +11583,6 @@ int perf_event_init_cpu(unsigned int cpu)
int perf_event_exit_cpu(unsigned int cpu) int perf_event_exit_cpu(unsigned int cpu)
{ {
per_cpu(is_hotplugging, cpu) = true;
perf_event_exit_cpu_context(cpu); perf_event_exit_cpu_context(cpu);
return 0; return 0;
} }

Loading…
Cancel
Save