perf: Add support for user and kernel event sharing

The ARM PMU counters are limited in number. Even for counting similar
events, the PMU driver allocates a new counter. Hence, counters configured
to count similar events are shared. This was only possible for the kernel
clients, but not for user-space clients. Hence, as an extension to this,
the kernel and the user-space are now able to share the similar events.
The counters can be shared between user-space only clients, kernel only
clients, and among user-space and kernel clients. The kernel and user's
attr->type (hardware/raw) and attr->config should be same for them to
share the same counter.

Change-Id: I4a4b35bde6beaf8f2aef74e683a9804e31807013
Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
tirimbino
Raghavendra Rao Ananta 7 years ago
parent ef790030bb
commit f0453c73e1
  1. 3
      include/linux/perf_event.h
  2. 10
      init/Kconfig
  3. 336
      kernel/events/core.c

@ -716,6 +716,9 @@ struct perf_event {
* CPU wakes up and will be removed from the list after that
*/
struct list_head dormant_event_entry;
/* Is this event shared with other events */
bool shared;
#endif /* CONFIG_PERF_EVENTS */
};

@ -1522,6 +1522,16 @@ config PERF_EVENTS
Say Y if unsure.
config PERF_USER_SHARE
bool "Perf event sharing with user-space"
help
Say yes here to enable the user-space sharing of events. The events
can be shared among other user-space events or with kernel created
events that has the same config and type event attributes.
Say N if unsure.
config DEBUG_PERF_USE_VMALLOC
default n
bool "Debug: use vmalloc to back perf mmap() buffers"

@ -1770,6 +1770,10 @@ static void perf_group_detach(struct perf_event *event)
if (event->group_leader != event) {
list_del_init(&event->group_entry);
event->group_leader->nr_siblings--;
if (event->shared)
event->group_leader = event;
goto out;
}
@ -4462,15 +4466,23 @@ int perf_event_release_kernel(struct perf_event *event)
if (!is_kernel_event(event)) {
perf_remove_from_owner(event);
} else {
if (perf_event_delete_kernel_shared(event) > 0)
return 0;
}
ctx = perf_event_ctx_lock(event);
WARN_ON_ONCE(ctx->parent_ctx);
perf_remove_from_context(event, DETACH_GROUP);
if (perf_event_delete_kernel_shared(event) > 0) {
perf_event__state_init(event);
perf_install_in_context(ctx, event, event->cpu);
perf_event_ctx_unlock(event, ctx);
perf_event_enable(event);
return 0;
}
raw_spin_lock_irq(&ctx->lock);
/*
* Mark this event as STATE_DEAD, there is no external reference to it
@ -9614,6 +9626,122 @@ enabled:
account_pmu_sb_event(event);
}
static struct perf_event *
perf_event_create_kernel_shared_check(struct perf_event_attr *attr, int cpu,
struct task_struct *task,
perf_overflow_handler_t overflow_handler,
struct perf_event *group_leader)
{
unsigned long idx;
struct perf_event *event;
struct shared_events_str *shrd_events;
/*
* Have to be per cpu events for sharing
*/
if (!shared_events || (u32)cpu >= nr_cpu_ids)
return NULL;
/*
* Can't handle these type requests for sharing right now.
*/
if (task || overflow_handler || attr->sample_period ||
(attr->type != PERF_TYPE_HARDWARE &&
attr->type != PERF_TYPE_RAW)) {
return NULL;
}
/*
* Using per_cpu_ptr (or could do cross cpu call which is what most of
* perf does to access per cpu data structures
*/
shrd_events = per_cpu_ptr(shared_events, cpu);
mutex_lock(&shrd_events->list_mutex);
event = NULL;
for_each_set_bit(idx, shrd_events->used_mask, SHARED_EVENTS_MAX) {
/* Do the comparisons field by field on the attr structure.
* This is because the user-space and kernel-space might
* be using different versions of perf. As a result,
* the fields' position in the memory and the size might not
* be the same. Hence memcmp() is not the best way to
* compare.
*/
if (attr->type == shrd_events->attr[idx].type &&
attr->config == shrd_events->attr[idx].config) {
event = shrd_events->events[idx];
/* Do not change the group for this shared event */
if (group_leader && event->group_leader != event) {
event = NULL;
continue;
}
event->shared = true;
atomic_inc(&shrd_events->refcount[idx]);
break;
}
}
mutex_unlock(&shrd_events->list_mutex);
return event;
}
static void
perf_event_create_kernel_shared_add(struct perf_event_attr *attr, int cpu,
struct task_struct *task,
perf_overflow_handler_t overflow_handler,
void *context,
struct perf_event *event)
{
unsigned long idx;
struct shared_events_str *shrd_events;
/*
* Have to be per cpu events for sharing
*/
if (!shared_events || (u32)cpu >= nr_cpu_ids)
return;
/*
* Can't handle these type requests for sharing right now.
*/
if (overflow_handler || attr->sample_period ||
(attr->type != PERF_TYPE_HARDWARE &&
attr->type != PERF_TYPE_RAW)) {
return;
}
/*
* Using per_cpu_ptr (or could do cross cpu call which is what most of
* perf does to access per cpu data structures
*/
shrd_events = per_cpu_ptr(shared_events, cpu);
mutex_lock(&shrd_events->list_mutex);
/*
* If we are in this routine, we know that this event isn't already in
* the shared list. Check if slot available in shared list
*/
idx = find_first_zero_bit(shrd_events->used_mask, SHARED_EVENTS_MAX);
if (idx >= SHARED_EVENTS_MAX)
goto out;
/*
* The event isn't in the list and there is an empty slot so add it.
*/
shrd_events->attr[idx] = *attr;
shrd_events->events[idx] = event;
set_bit(idx, shrd_events->used_mask);
atomic_set(&shrd_events->refcount[idx], 1);
out:
mutex_unlock(&shrd_events->list_mutex);
}
/*
* Allocate and initialize a event structure
*/
@ -10084,6 +10212,31 @@ again:
return gctx;
}
#ifdef CONFIG_PERF_USER_SHARE
static void perf_group_shared_event(struct perf_event *event,
struct perf_event *group_leader)
{
if (!event->shared || !group_leader)
return;
/* Do not attempt to change the group for this shared event */
if (event->group_leader != event)
return;
/*
* Single events have the group leaders as themselves.
* As we now have a new group to attach to, remove from
* the previous group and attach it to the new group.
*/
perf_remove_from_context(event, DETACH_GROUP);
event->group_leader = group_leader;
perf_event__state_init(event);
perf_install_in_context(group_leader->ctx, event, event->cpu);
}
#endif
/**
* sys_perf_event_open - open a performance event, associate it to a task/cpu
*
@ -10097,7 +10250,7 @@ SYSCALL_DEFINE5(perf_event_open,
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
struct perf_event *group_leader = NULL, *output_event = NULL;
struct perf_event *event, *sibling;
struct perf_event *event = NULL, *sibling;
struct perf_event_attr attr;
struct perf_event_context *ctx, *uninitialized_var(gctx);
struct file *event_file = NULL;
@ -10209,11 +10362,17 @@ SYSCALL_DEFINE5(perf_event_open,
if (flags & PERF_FLAG_PID_CGROUP)
cgroup_fd = pid;
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
NULL, NULL, cgroup_fd);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err_cred;
#ifdef CONFIG_PERF_USER_SHARE
event = perf_event_create_kernel_shared_check(&attr, cpu, task, NULL,
group_leader);
#endif
if (!event) {
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
NULL, NULL, cgroup_fd);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err_cred;
}
}
if (is_sampling_event(event)) {
@ -10380,7 +10539,7 @@ SYSCALL_DEFINE5(perf_event_open,
* Must be under the same ctx::mutex as perf_install_in_context(),
* because we need to serialize with concurrent event creation.
*/
if (!exclusive_event_installable(event, ctx)) {
if (!event->shared && !exclusive_event_installable(event, ctx)) {
/* exclusive and group stuff are assumed mutually exclusive */
WARN_ON_ONCE(move_group);
@ -10451,10 +10610,17 @@ SYSCALL_DEFINE5(perf_event_open,
perf_event__header_size(event);
perf_event__id_header_size(event);
event->owner = current;
#ifdef CONFIG_PERF_USER_SHARE
if (event->shared && group_leader)
perf_group_shared_event(event, group_leader);
#endif
if (!event->shared) {
event->owner = current;
perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx);
perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx);
}
if (move_group)
perf_event_ctx_unlock(group_leader, gctx);
@ -10465,9 +10631,11 @@ SYSCALL_DEFINE5(perf_event_open,
put_task_struct(task);
}
mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
if (!event->shared) {
mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
}
/*
* Drop the reference on the group_event after placing the
@ -10477,6 +10645,14 @@ SYSCALL_DEFINE5(perf_event_open,
*/
fdput(group);
fd_install(event_fd, event_file);
#ifdef CONFIG_PERF_USER_SHARE
/* Add the event to the shared events list */
if (!event->shared)
perf_event_create_kernel_shared_add(&attr, cpu,
task, NULL, ctx, event);
#endif
return event_fd;
err_locked:
@ -10508,102 +10684,6 @@ err_fd:
return err;
}
static struct perf_event *
perf_event_create_kernel_shared_check(struct perf_event_attr *attr, int cpu,
struct task_struct *task,
perf_overflow_handler_t overflow_handler,
void *context)
{
unsigned long idx;
struct perf_event *event;
struct shared_events_str *shrd_events;
/*
* Have to be per cpu events for sharing
*/
if (!shared_events || (u32)cpu >= nr_cpu_ids)
return NULL;
/*
* Can't handle these type requests for sharing right now.
*/
if (task || context || overflow_handler ||
(attr->type != PERF_TYPE_HARDWARE &&
attr->type != PERF_TYPE_RAW))
return NULL;
/*
* Using per_cpu_ptr (or could do cross cpu call which is what most of
* perf does to access per cpu data structures
*/
shrd_events = per_cpu_ptr(shared_events, cpu);
mutex_lock(&shrd_events->list_mutex);
event = NULL;
for_each_set_bit(idx, shrd_events->used_mask, SHARED_EVENTS_MAX) {
if (memcmp(attr, &shrd_events->attr[idx],
sizeof(shrd_events->attr[idx])) == 0) {
atomic_inc(&shrd_events->refcount[idx]);
event = shrd_events->events[idx];
break;
}
}
mutex_unlock(&shrd_events->list_mutex);
return event;
}
static void
perf_event_create_kernel_shared_add(struct perf_event_attr *attr, int cpu,
struct task_struct *task,
perf_overflow_handler_t overflow_handler,
void *context,
struct perf_event *event)
{
unsigned long idx;
struct shared_events_str *shrd_events;
/*
* Have to be per cpu events for sharing
*/
if (!shared_events || (u32)cpu >= nr_cpu_ids)
return;
/*
* Can't handle these type requests for sharing right now.
*/
if (task || context || overflow_handler ||
(attr->type != PERF_TYPE_HARDWARE &&
attr->type != PERF_TYPE_RAW))
return;
/*
* Using per_cpu_ptr (or could do cross cpu call which is what most of
* perf does to access per cpu data structures
*/
shrd_events = per_cpu_ptr(shared_events, cpu);
mutex_lock(&shrd_events->list_mutex);
/*
* If we are in this routine, we know that this event isn't already in
* the shared list. Check if slot available in shared list
*/
idx = find_first_zero_bit(shrd_events->used_mask, SHARED_EVENTS_MAX);
if (idx >= SHARED_EVENTS_MAX)
goto out;
/*
* The event isn't in the list and there is an empty slot so add it.
*/
shrd_events->attr[idx] = *attr;
shrd_events->events[idx] = event;
set_bit(idx, shrd_events->used_mask);
atomic_set(&shrd_events->refcount[idx], 1);
out:
mutex_unlock(&shrd_events->list_mutex);
}
/**
* perf_event_create_kernel_counter
@ -10622,28 +10702,26 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
struct perf_event *event;
int err;
/*
* Check if the requested attributes match a shared event
*/
event = perf_event_create_kernel_shared_check(attr, cpu,
task, overflow_handler, context);
if (event)
return event;
/*
* Get the target context (task or percpu):
*/
event = perf_event_alloc(attr, cpu, task, NULL, NULL,
overflow_handler, context, -1);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err;
event = perf_event_create_kernel_shared_check(attr, cpu, task,
overflow_handler, NULL);
if (!event) {
event = perf_event_alloc(attr, cpu, task, NULL, NULL,
overflow_handler, context, -1);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err;
}
}
/* Mark owner so we could distinguish it from user events. */
event->owner = TASK_TOMBSTONE;
if (event->shared)
return event;
/*
* Get the target context (task or percpu):
*/
ctx = find_get_context(event->pmu, task, event);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);

Loading…
Cancel
Save