perf: Add support for user and kernel event sharing

The ARM PMU counters are limited in number. Even for counting similar events, the PMU driver allocates a new counter. Hence, counters configured to count similar events are shared. This was only possible for the kernel clients, but not for user-space clients. Hence, as an extension to this, the kernel and the user-space are now able to share the similar events. The counters can be shared between user-space only clients, kernel only clients, and among user-space and kernel clients. The kernel and user's attr->type (hardware/raw) and attr->config should be same for them to share the same counter. Change-Id: I4a4b35bde6beaf8f2aef74e683a9804e31807013 Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
7 years ago · f0453c73e1
parent ef790030bb
commit f0453c73e1
3 changed files with 220 additions and 129 deletions
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@ -716,6 +716,9 @@ struct perf_event {
 	 * CPU wakes up and will be removed from the list after that
 	 */
 	struct list_head		dormant_event_entry;
+
+	/* Is this event shared with other events */
+	bool					shared;
 #endif /* CONFIG_PERF_EVENTS */
 };

--- a/init/Kconfig
+++ b/init/Kconfig
@ -1522,6 +1522,16 @@ config PERF_EVENTS

 	  Say Y if unsure.

+config PERF_USER_SHARE
+	bool "Perf event sharing with user-space"
+	help
+	  Say yes here to enable the user-space sharing of events. The events
+	  can be shared among other user-space events or with kernel created
+	  events that has the same config and type event attributes.
+
+	  Say N if unsure.
+
+
 config DEBUG_PERF_USE_VMALLOC
 	default n
 	bool "Debug: use vmalloc to back perf mmap() buffers"
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@ -1770,6 +1770,10 @@ static void perf_group_detach(struct perf_event *event)
 	if (event->group_leader != event) {
 		list_del_init(&event->group_entry);
 		event->group_leader->nr_siblings--;
+
+		if (event->shared)
+			event->group_leader = event;
+
 		goto out;
 	}

@ -4462,15 +4466,23 @@ int perf_event_release_kernel(struct perf_event *event)

 	if (!is_kernel_event(event)) {
 		perf_remove_from_owner(event);
-	} else {
-		if (perf_event_delete_kernel_shared(event) > 0)
-			return 0;
 	}

 	ctx = perf_event_ctx_lock(event);
 	WARN_ON_ONCE(ctx->parent_ctx);
 	perf_remove_from_context(event, DETACH_GROUP);

+	if (perf_event_delete_kernel_shared(event) > 0) {
+		perf_event__state_init(event);
+		perf_install_in_context(ctx, event, event->cpu);
+
+		perf_event_ctx_unlock(event, ctx);
+
+		perf_event_enable(event);
+
+		return 0;
+	}
+
 	raw_spin_lock_irq(&ctx->lock);
 	/*
 	 * Mark this event as STATE_DEAD, there is no external reference to it
@ -9614,6 +9626,122 @@ enabled:
 	account_pmu_sb_event(event);
 }

+static struct perf_event *
+perf_event_create_kernel_shared_check(struct perf_event_attr *attr, int cpu,
+		struct task_struct *task,
+		perf_overflow_handler_t overflow_handler,
+		struct perf_event *group_leader)
+{
+	unsigned long idx;
+	struct perf_event *event;
+	struct shared_events_str *shrd_events;
+
+	/*
+	 * Have to be per cpu events for sharing
+	 */
+	if (!shared_events || (u32)cpu >= nr_cpu_ids)
+		return NULL;
+
+	/*
+	 * Can't handle these type requests for sharing right now.
+	 */
+	if (task || overflow_handler || attr->sample_period ||
+	    (attr->type != PERF_TYPE_HARDWARE &&
+	     attr->type != PERF_TYPE_RAW)) {
+		return NULL;
+	}
+
+	/*
+	 * Using per_cpu_ptr (or could do cross cpu call which is what most of
+	 * perf does to access per cpu data structures
+	 */
+	shrd_events = per_cpu_ptr(shared_events, cpu);
+
+	mutex_lock(&shrd_events->list_mutex);
+
+	event = NULL;
+	for_each_set_bit(idx, shrd_events->used_mask, SHARED_EVENTS_MAX) {
+		/* Do the comparisons field by field on the attr structure.
+		 * This is because the user-space and kernel-space might
+		 * be using different versions of perf. As a result,
+		 * the fields' position in the memory and the size might not
+		 * be the same. Hence memcmp() is not the best way to
+		 * compare.
+		 */
+		if (attr->type == shrd_events->attr[idx].type &&
+			attr->config == shrd_events->attr[idx].config) {
+
+			event = shrd_events->events[idx];
+
+			/* Do not change the group for this shared event */
+			if (group_leader && event->group_leader != event) {
+				event = NULL;
+				continue;
+			}
+
+			event->shared = true;
+			atomic_inc(&shrd_events->refcount[idx]);
+			break;
+		}
+	}
+	mutex_unlock(&shrd_events->list_mutex);
+
+	return event;
+}
+
+static void
+perf_event_create_kernel_shared_add(struct perf_event_attr *attr, int cpu,
+				 struct task_struct *task,
+				 perf_overflow_handler_t overflow_handler,
+				 void *context,
+				 struct perf_event *event)
+{
+	unsigned long idx;
+	struct shared_events_str *shrd_events;
+
+	/*
+	 * Have to be per cpu events for sharing
+	 */
+	if (!shared_events || (u32)cpu >= nr_cpu_ids)
+		return;
+
+	/*
+	 * Can't handle these type requests for sharing right now.
+	 */
+	if (overflow_handler || attr->sample_period ||
+	    (attr->type != PERF_TYPE_HARDWARE &&
+	     attr->type != PERF_TYPE_RAW)) {
+		return;
+	}
+
+	/*
+	 * Using per_cpu_ptr (or could do cross cpu call which is what most of
+	 * perf does to access per cpu data structures
+	 */
+	shrd_events = per_cpu_ptr(shared_events, cpu);
+
+	mutex_lock(&shrd_events->list_mutex);
+
+	/*
+	 * If we are in this routine, we know that this event isn't already in
+	 * the shared list. Check if slot available in shared list
+	 */
+	idx = find_first_zero_bit(shrd_events->used_mask, SHARED_EVENTS_MAX);
+
+	if (idx >= SHARED_EVENTS_MAX)
+		goto out;
+
+	/*
+	 * The event isn't in the list and there is an empty slot so add it.
+	 */
+	shrd_events->attr[idx]   = *attr;
+	shrd_events->events[idx] = event;
+	set_bit(idx, shrd_events->used_mask);
+	atomic_set(&shrd_events->refcount[idx], 1);
+out:
+	mutex_unlock(&shrd_events->list_mutex);
+}
+
 /*
 * Allocate and initialize a event structure
 */
@ -10084,6 +10212,31 @@ again:
 	return gctx;
 }

+#ifdef CONFIG_PERF_USER_SHARE
+static void perf_group_shared_event(struct perf_event *event,
+		struct perf_event *group_leader)
+{
+	if (!event->shared || !group_leader)
+		return;
+
+	/* Do not attempt to change the group for this shared event */
+	if (event->group_leader != event)
+		return;
+
+	/*
+	 * Single events have the group leaders as themselves.
+	 * As we now have a new group to attach to, remove from
+	 * the previous group and attach it to the new group.
+	 */
+	perf_remove_from_context(event, DETACH_GROUP);
+
+	event->group_leader	= group_leader;
+	perf_event__state_init(event);
+
+	perf_install_in_context(group_leader->ctx, event, event->cpu);
+}
+#endif
+
 /**
 * sys_perf_event_open - open a performance event, associate it to a task/cpu
 *
@ -10097,7 +10250,7 @@ SYSCALL_DEFINE5(perf_event_open,
 		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
 {
 	struct perf_event *group_leader = NULL, *output_event = NULL;
-	struct perf_event *event, *sibling;
+	struct perf_event *event = NULL, *sibling;
 	struct perf_event_attr attr;
 	struct perf_event_context *ctx, *uninitialized_var(gctx);
 	struct file *event_file = NULL;
@ -10209,11 +10362,17 @@ SYSCALL_DEFINE5(perf_event_open,
 	if (flags & PERF_FLAG_PID_CGROUP)
 		cgroup_fd = pid;

-	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
-				 NULL, NULL, cgroup_fd);
-	if (IS_ERR(event)) {
-		err = PTR_ERR(event);
-		goto err_cred;
+#ifdef CONFIG_PERF_USER_SHARE
+	event = perf_event_create_kernel_shared_check(&attr, cpu, task, NULL,
+			group_leader);
+#endif
+	if (!event) {
+		event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
+					 NULL, NULL, cgroup_fd);
+		if (IS_ERR(event)) {
+			err = PTR_ERR(event);
+			goto err_cred;
+		}
 	}

 	if (is_sampling_event(event)) {
@ -10380,7 +10539,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	 * Must be under the same ctx::mutex as perf_install_in_context(),
 	 * because we need to serialize with concurrent event creation.
 	 */
-	if (!exclusive_event_installable(event, ctx)) {
+	if (!event->shared && !exclusive_event_installable(event, ctx)) {
 		/* exclusive and group stuff are assumed mutually exclusive */
 		WARN_ON_ONCE(move_group);

@ -10451,10 +10610,17 @@ SYSCALL_DEFINE5(perf_event_open,
 	perf_event__header_size(event);
 	perf_event__id_header_size(event);

-	event->owner = current;
+#ifdef CONFIG_PERF_USER_SHARE
+	if (event->shared && group_leader)
+		perf_group_shared_event(event, group_leader);
+#endif
+
+	if (!event->shared) {
+		event->owner = current;

-	perf_install_in_context(ctx, event, event->cpu);
-	perf_unpin_context(ctx);
+		perf_install_in_context(ctx, event, event->cpu);
+		perf_unpin_context(ctx);
+	}

 	if (move_group)
 		perf_event_ctx_unlock(group_leader, gctx);
@ -10465,9 +10631,11 @@ SYSCALL_DEFINE5(perf_event_open,
 		put_task_struct(task);
 	}

-	mutex_lock(&current->perf_event_mutex);
-	list_add_tail(&event->owner_entry, &current->perf_event_list);
-	mutex_unlock(&current->perf_event_mutex);
+	if (!event->shared) {
+		mutex_lock(&current->perf_event_mutex);
+		list_add_tail(&event->owner_entry, &current->perf_event_list);
+		mutex_unlock(&current->perf_event_mutex);
+	}

 	/*
 	 * Drop the reference on the group_event after placing the
@ -10477,6 +10645,14 @@ SYSCALL_DEFINE5(perf_event_open,
 	 */
 	fdput(group);
 	fd_install(event_fd, event_file);
+
+#ifdef CONFIG_PERF_USER_SHARE
+	/* Add the event to the shared events list */
+	if (!event->shared)
+		perf_event_create_kernel_shared_add(&attr, cpu,
+				 task, NULL, ctx, event);
+#endif
+
 	return event_fd;

 err_locked:
@ -10508,102 +10684,6 @@ err_fd:
 	return err;
 }

-static struct perf_event *
-perf_event_create_kernel_shared_check(struct perf_event_attr *attr, int cpu,
-				 struct task_struct *task,
-				 perf_overflow_handler_t overflow_handler,
-				 void *context)
-{
-	unsigned long idx;
-	struct perf_event *event;
-	struct shared_events_str *shrd_events;
-
-	/*
-	 * Have to be per cpu events for sharing
-	 */
-	if (!shared_events || (u32)cpu >= nr_cpu_ids)
-		return NULL;
-
-	/*
-	 * Can't handle these type requests for sharing right now.
-	 */
-	if (task || context || overflow_handler ||
-	    (attr->type != PERF_TYPE_HARDWARE &&
-	     attr->type != PERF_TYPE_RAW))
-		return NULL;
-
-	/*
-	 * Using per_cpu_ptr (or could do cross cpu call which is what most of
-	 * perf does to access per cpu data structures
-	 */
-	shrd_events = per_cpu_ptr(shared_events, cpu);
-
-	mutex_lock(&shrd_events->list_mutex);
-
-	event = NULL;
-	for_each_set_bit(idx, shrd_events->used_mask, SHARED_EVENTS_MAX) {
-		if (memcmp(attr, &shrd_events->attr[idx],
-		    sizeof(shrd_events->attr[idx])) == 0) {
-			atomic_inc(&shrd_events->refcount[idx]);
-			event = shrd_events->events[idx];
-			break;
-		}
-	}
-	mutex_unlock(&shrd_events->list_mutex);
-	return event;
-}
-
-static void
-perf_event_create_kernel_shared_add(struct perf_event_attr *attr, int cpu,
-				 struct task_struct *task,
-				 perf_overflow_handler_t overflow_handler,
-				 void *context,
-				 struct perf_event *event)
-{
-	unsigned long idx;
-	struct shared_events_str *shrd_events;
-
-	/*
-	 * Have to be per cpu events for sharing
-	 */
-	if (!shared_events || (u32)cpu >= nr_cpu_ids)
-		return;
-
-	/*
-	 * Can't handle these type requests for sharing right now.
-	 */
-	if (task || context || overflow_handler ||
-	    (attr->type != PERF_TYPE_HARDWARE &&
-	     attr->type != PERF_TYPE_RAW))
-		return;
-
-	/*
-	 * Using per_cpu_ptr (or could do cross cpu call which is what most of
-	 * perf does to access per cpu data structures
-	 */
-	shrd_events = per_cpu_ptr(shared_events, cpu);
-
-	mutex_lock(&shrd_events->list_mutex);
-
-	/*
-	 * If we are in this routine, we know that this event isn't already in
-	 * the shared list. Check if slot available in shared list
-	 */
-	idx = find_first_zero_bit(shrd_events->used_mask, SHARED_EVENTS_MAX);
-
-	if (idx >= SHARED_EVENTS_MAX)
-		goto out;
-
-	/*
-	 * The event isn't in the list and there is an empty slot so add it.
-	 */
-	shrd_events->attr[idx]   = *attr;
-	shrd_events->events[idx] = event;
-	set_bit(idx, shrd_events->used_mask);
-	atomic_set(&shrd_events->refcount[idx], 1);
-out:
-	mutex_unlock(&shrd_events->list_mutex);
-}

 /**
 * perf_event_create_kernel_counter
@ -10622,28 +10702,26 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	struct perf_event *event;
 	int err;

-	/*
-	 * Check if the requested attributes match a shared event
-	 */
-	event = perf_event_create_kernel_shared_check(attr, cpu,
-				 task, overflow_handler, context);
-	if (event)
-		return event;
-
-	/*
-	 * Get the target context (task or percpu):
-	 */
-
-	event = perf_event_alloc(attr, cpu, task, NULL, NULL,
-				 overflow_handler, context, -1);
-	if (IS_ERR(event)) {
-		err = PTR_ERR(event);
-		goto err;
+	event = perf_event_create_kernel_shared_check(attr, cpu, task,
+						overflow_handler, NULL);
+	if (!event) {
+		event = perf_event_alloc(attr, cpu, task, NULL, NULL,
+				overflow_handler, context, -1);
+		if (IS_ERR(event)) {
+			err = PTR_ERR(event);
+			goto err;
+		}
 	}

 	/* Mark owner so we could distinguish it from user events. */
 	event->owner = TASK_TOMBSTONE;

+	if (event->shared)
+		return event;
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
 	ctx = find_get_context(event->pmu, task, event);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);