Pull perf fixes and cleanups from Ingo Molnar: "A kernel fix plus mostly tooling fixes, but also some tooling restructuring and cleanups" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits) perf: Fix building warning on ARM 32 perf symbols: Fix use after free in filename__read_build_id perf evlist: Use roundup_pow_of_two tools: Adopt roundup_pow_of_two perf tools: Make the mmap length autotuning more robust tools: Adopt rounddown_pow_of_two and deps tools: Adopt fls_long and deps tools: Move bitops.h from tools/perf/util to tools/ tools: Introduce asm-generic/bitops.h tools lib: Move asm-generic/bitops/find.h code to tools/include and tools/lib tools: Whitespace prep patches for moving bitops.h tools: Move code originally from asm-generic/atomic.h into tools/include/asm-generic/ tools: Move code originally from linux/log2.h to tools/include/linux/ tools: Move __ffs implementation to tools/include/asm-generic/bitops/__ffs.h perf evlist: Do not use hard coded value for a mmap_pages default perf trace: Let the perf_evlist__mmap autosize the number of pages to use perf evlist: Improve the strerror_mmap method perf evlist: Clarify sterror_mmap variable names perf evlist: Fixup brown paper bag on "hint" for --mmap-pages cmdline arg perf trace: Provide a better explanation when mmap fails ...tirimbino
commit
88a57667f2
@ -0,0 +1,27 @@ |
||||
#ifndef __TOOLS_ASM_GENERIC_BITOPS_H |
||||
#define __TOOLS_ASM_GENERIC_BITOPS_H |
||||
|
||||
/*
|
||||
* tools/ copied this from include/asm-generic/bitops.h, bit by bit as it needed |
||||
* some functions. |
||||
* |
||||
* For the benefit of those who are trying to port Linux to another |
||||
* architecture, here are some C-language equivalents. You should |
||||
* recode these in the native assembly language, if at all possible. |
||||
* |
||||
* C language equivalents written by Theodore Ts'o, 9/26/92 |
||||
*/ |
||||
|
||||
#include <asm-generic/bitops/__ffs.h> |
||||
#include <asm-generic/bitops/fls.h> |
||||
#include <asm-generic/bitops/__fls.h> |
||||
#include <asm-generic/bitops/fls64.h> |
||||
#include <asm-generic/bitops/find.h> |
||||
|
||||
#ifndef _TOOLS_LINUX_BITOPS_H_ |
||||
#error only <linux/bitops.h> can be included directly |
||||
#endif |
||||
|
||||
#include <asm-generic/bitops/atomic.h> |
||||
|
||||
#endif /* __TOOLS_ASM_GENERIC_BITOPS_H */ |
@ -0,0 +1,43 @@ |
||||
#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ |
||||
#define _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ |
||||
|
||||
#include <asm/types.h> |
||||
|
||||
/**
|
||||
* __ffs - find first bit in word. |
||||
* @word: The word to search |
||||
* |
||||
* Undefined if no bit exists, so code should check against 0 first. |
||||
*/ |
||||
static __always_inline unsigned long __ffs(unsigned long word) |
||||
{ |
||||
int num = 0; |
||||
|
||||
#if __BITS_PER_LONG == 64 |
||||
if ((word & 0xffffffff) == 0) { |
||||
num += 32; |
||||
word >>= 32; |
||||
} |
||||
#endif |
||||
if ((word & 0xffff) == 0) { |
||||
num += 16; |
||||
word >>= 16; |
||||
} |
||||
if ((word & 0xff) == 0) { |
||||
num += 8; |
||||
word >>= 8; |
||||
} |
||||
if ((word & 0xf) == 0) { |
||||
num += 4; |
||||
word >>= 4; |
||||
} |
||||
if ((word & 0x3) == 0) { |
||||
num += 2; |
||||
word >>= 2; |
||||
} |
||||
if ((word & 0x1) == 0) |
||||
num += 1; |
||||
return num; |
||||
} |
||||
|
||||
#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ */ |
@ -0,0 +1 @@ |
||||
#include <../../../../include/asm-generic/bitops/__fls.h> |
@ -0,0 +1,22 @@ |
||||
#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ |
||||
#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ |
||||
|
||||
#include <asm/types.h> |
||||
|
||||
static inline void set_bit(int nr, unsigned long *addr) |
||||
{ |
||||
addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG); |
||||
} |
||||
|
||||
static inline void clear_bit(int nr, unsigned long *addr) |
||||
{ |
||||
addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG)); |
||||
} |
||||
|
||||
static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) |
||||
{ |
||||
return ((1UL << (nr % __BITS_PER_LONG)) & |
||||
(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0; |
||||
} |
||||
|
||||
#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */ |
@ -0,0 +1,33 @@ |
||||
#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ |
||||
#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ |
||||
|
||||
#ifndef find_next_bit |
||||
/**
|
||||
* find_next_bit - find the next set bit in a memory region |
||||
* @addr: The address to base the search on |
||||
* @offset: The bitnumber to start searching at |
||||
* @size: The bitmap size in bits |
||||
* |
||||
* Returns the bit number for the next set bit |
||||
* If no bits are set, returns @size. |
||||
*/ |
||||
extern unsigned long find_next_bit(const unsigned long *addr, unsigned long |
||||
size, unsigned long offset); |
||||
#endif |
||||
|
||||
#ifndef find_first_bit |
||||
|
||||
/**
|
||||
* find_first_bit - find the first set bit in a memory region |
||||
* @addr: The address to start the search at |
||||
* @size: The maximum number of bits to search |
||||
* |
||||
* Returns the bit number of the first set bit. |
||||
* If no bits are set, returns @size. |
||||
*/ |
||||
extern unsigned long find_first_bit(const unsigned long *addr, |
||||
unsigned long size); |
||||
|
||||
#endif /* find_first_bit */ |
||||
|
||||
#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */ |
@ -0,0 +1 @@ |
||||
#include <../../../../include/asm-generic/bitops/fls.h> |
@ -0,0 +1 @@ |
||||
#include <../../../../include/asm-generic/bitops/fls64.h> |
@ -0,0 +1,53 @@ |
||||
#ifndef _TOOLS_LINUX_BITOPS_H_ |
||||
#define _TOOLS_LINUX_BITOPS_H_ |
||||
|
||||
#include <linux/kernel.h> |
||||
#include <linux/compiler.h> |
||||
#include <asm/hweight.h> |
||||
|
||||
#ifndef __WORDSIZE |
||||
#define __WORDSIZE (__SIZEOF_LONG__ * 8) |
||||
#endif |
||||
|
||||
#define BITS_PER_LONG __WORDSIZE |
||||
|
||||
#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) |
||||
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) |
||||
#define BITS_PER_BYTE 8 |
||||
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) |
||||
#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) |
||||
#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) |
||||
#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) |
||||
|
||||
/*
|
||||
* Include this here because some architectures need generic_ffs/fls in |
||||
* scope |
||||
* |
||||
* XXX: this needs to be asm/bitops.h, when we get to per arch optimizations |
||||
*/ |
||||
#include <asm-generic/bitops.h> |
||||
|
||||
#define for_each_set_bit(bit, addr, size) \ |
||||
for ((bit) = find_first_bit((addr), (size)); \
|
||||
(bit) < (size); \
|
||||
(bit) = find_next_bit((addr), (size), (bit) + 1)) |
||||
|
||||
/* same as for_each_set_bit() but use bit as value to start with */ |
||||
#define for_each_set_bit_from(bit, addr, size) \ |
||||
for ((bit) = find_next_bit((addr), (size), (bit)); \
|
||||
(bit) < (size); \
|
||||
(bit) = find_next_bit((addr), (size), (bit) + 1)) |
||||
|
||||
static inline unsigned long hweight_long(unsigned long w) |
||||
{ |
||||
return sizeof(w) == 4 ? hweight32(w) : hweight64(w); |
||||
} |
||||
|
||||
static inline unsigned fls_long(unsigned long l) |
||||
{ |
||||
if (sizeof(l) == 4) |
||||
return fls(l); |
||||
return fls64(l); |
||||
} |
||||
|
||||
#endif |
@ -0,0 +1,185 @@ |
||||
/* Integer base 2 logarithm calculation
|
||||
* |
||||
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. |
||||
* Written by David Howells (dhowells@redhat.com) |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation; either version |
||||
* 2 of the License, or (at your option) any later version. |
||||
*/ |
||||
|
||||
#ifndef _TOOLS_LINUX_LOG2_H |
||||
#define _TOOLS_LINUX_LOG2_H |
||||
|
||||
/*
|
||||
* deal with unrepresentable constant logarithms |
||||
*/ |
||||
extern __attribute__((const, noreturn)) |
||||
int ____ilog2_NaN(void); |
||||
|
||||
/*
|
||||
* non-constant log of base 2 calculators |
||||
* - the arch may override these in asm/bitops.h if they can be implemented |
||||
* more efficiently than using fls() and fls64() |
||||
* - the arch is not required to handle n==0 if implementing the fallback |
||||
*/ |
||||
static inline __attribute__((const)) |
||||
int __ilog2_u32(u32 n) |
||||
{ |
||||
return fls(n) - 1; |
||||
} |
||||
|
||||
static inline __attribute__((const)) |
||||
int __ilog2_u64(u64 n) |
||||
{ |
||||
return fls64(n) - 1; |
||||
} |
||||
|
||||
/*
|
||||
* Determine whether some value is a power of two, where zero is |
||||
* *not* considered a power of two. |
||||
*/ |
||||
|
||||
static inline __attribute__((const)) |
||||
bool is_power_of_2(unsigned long n) |
||||
{ |
||||
return (n != 0 && ((n & (n - 1)) == 0)); |
||||
} |
||||
|
||||
/*
|
||||
* round up to nearest power of two |
||||
*/ |
||||
static inline __attribute__((const)) |
||||
unsigned long __roundup_pow_of_two(unsigned long n) |
||||
{ |
||||
return 1UL << fls_long(n - 1); |
||||
} |
||||
|
||||
/*
|
||||
* round down to nearest power of two |
||||
*/ |
||||
static inline __attribute__((const)) |
||||
unsigned long __rounddown_pow_of_two(unsigned long n) |
||||
{ |
||||
return 1UL << (fls_long(n) - 1); |
||||
} |
||||
|
||||
/**
|
||||
* ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value |
||||
* @n - parameter |
||||
* |
||||
* constant-capable log of base 2 calculation |
||||
* - this can be used to initialise global variables from constant data, hence |
||||
* the massive ternary operator construction |
||||
* |
||||
* selects the appropriately-sized optimised version depending on sizeof(n) |
||||
*/ |
||||
#define ilog2(n) \ |
||||
( \
|
||||
__builtin_constant_p(n) ? ( \
|
||||
(n) < 1 ? ____ilog2_NaN() : \
|
||||
(n) & (1ULL << 63) ? 63 : \
|
||||
(n) & (1ULL << 62) ? 62 : \
|
||||
(n) & (1ULL << 61) ? 61 : \
|
||||
(n) & (1ULL << 60) ? 60 : \
|
||||
(n) & (1ULL << 59) ? 59 : \
|
||||
(n) & (1ULL << 58) ? 58 : \
|
||||
(n) & (1ULL << 57) ? 57 : \
|
||||
(n) & (1ULL << 56) ? 56 : \
|
||||
(n) & (1ULL << 55) ? 55 : \
|
||||
(n) & (1ULL << 54) ? 54 : \
|
||||
(n) & (1ULL << 53) ? 53 : \
|
||||
(n) & (1ULL << 52) ? 52 : \
|
||||
(n) & (1ULL << 51) ? 51 : \
|
||||
(n) & (1ULL << 50) ? 50 : \
|
||||
(n) & (1ULL << 49) ? 49 : \
|
||||
(n) & (1ULL << 48) ? 48 : \
|
||||
(n) & (1ULL << 47) ? 47 : \
|
||||
(n) & (1ULL << 46) ? 46 : \
|
||||
(n) & (1ULL << 45) ? 45 : \
|
||||
(n) & (1ULL << 44) ? 44 : \
|
||||
(n) & (1ULL << 43) ? 43 : \
|
||||
(n) & (1ULL << 42) ? 42 : \
|
||||
(n) & (1ULL << 41) ? 41 : \
|
||||
(n) & (1ULL << 40) ? 40 : \
|
||||
(n) & (1ULL << 39) ? 39 : \
|
||||
(n) & (1ULL << 38) ? 38 : \
|
||||
(n) & (1ULL << 37) ? 37 : \
|
||||
(n) & (1ULL << 36) ? 36 : \
|
||||
(n) & (1ULL << 35) ? 35 : \
|
||||
(n) & (1ULL << 34) ? 34 : \
|
||||
(n) & (1ULL << 33) ? 33 : \
|
||||
(n) & (1ULL << 32) ? 32 : \
|
||||
(n) & (1ULL << 31) ? 31 : \
|
||||
(n) & (1ULL << 30) ? 30 : \
|
||||
(n) & (1ULL << 29) ? 29 : \
|
||||
(n) & (1ULL << 28) ? 28 : \
|
||||
(n) & (1ULL << 27) ? 27 : \
|
||||
(n) & (1ULL << 26) ? 26 : \
|
||||
(n) & (1ULL << 25) ? 25 : \
|
||||
(n) & (1ULL << 24) ? 24 : \
|
||||
(n) & (1ULL << 23) ? 23 : \
|
||||
(n) & (1ULL << 22) ? 22 : \
|
||||
(n) & (1ULL << 21) ? 21 : \
|
||||
(n) & (1ULL << 20) ? 20 : \
|
||||
(n) & (1ULL << 19) ? 19 : \
|
||||
(n) & (1ULL << 18) ? 18 : \
|
||||
(n) & (1ULL << 17) ? 17 : \
|
||||
(n) & (1ULL << 16) ? 16 : \
|
||||
(n) & (1ULL << 15) ? 15 : \
|
||||
(n) & (1ULL << 14) ? 14 : \
|
||||
(n) & (1ULL << 13) ? 13 : \
|
||||
(n) & (1ULL << 12) ? 12 : \
|
||||
(n) & (1ULL << 11) ? 11 : \
|
||||
(n) & (1ULL << 10) ? 10 : \
|
||||
(n) & (1ULL << 9) ? 9 : \
|
||||
(n) & (1ULL << 8) ? 8 : \
|
||||
(n) & (1ULL << 7) ? 7 : \
|
||||
(n) & (1ULL << 6) ? 6 : \
|
||||
(n) & (1ULL << 5) ? 5 : \
|
||||
(n) & (1ULL << 4) ? 4 : \
|
||||
(n) & (1ULL << 3) ? 3 : \
|
||||
(n) & (1ULL << 2) ? 2 : \
|
||||
(n) & (1ULL << 1) ? 1 : \
|
||||
(n) & (1ULL << 0) ? 0 : \
|
||||
____ilog2_NaN() \
|
||||
) : \
|
||||
(sizeof(n) <= 4) ? \
|
||||
__ilog2_u32(n) : \
|
||||
__ilog2_u64(n) \
|
||||
) |
||||
|
||||
/**
|
||||
* roundup_pow_of_two - round the given value up to nearest power of two |
||||
* @n - parameter |
||||
* |
||||
* round the given value up to the nearest power of two |
||||
* - the result is undefined when n == 0 |
||||
* - this can be used to initialise global variables from constant data |
||||
*/ |
||||
#define roundup_pow_of_two(n) \ |
||||
( \
|
||||
__builtin_constant_p(n) ? ( \
|
||||
(n == 1) ? 1 : \
|
||||
(1UL << (ilog2((n) - 1) + 1)) \
|
||||
) : \
|
||||
__roundup_pow_of_two(n) \
|
||||
) |
||||
|
||||
/**
|
||||
* rounddown_pow_of_two - round the given value down to nearest power of two |
||||
* @n - parameter |
||||
* |
||||
* round the given value down to the nearest power of two |
||||
* - the result is undefined when n == 0 |
||||
* - this can be used to initialise global variables from constant data |
||||
*/ |
||||
#define rounddown_pow_of_two(n) \ |
||||
( \
|
||||
__builtin_constant_p(n) ? ( \
|
||||
(1UL << ilog2(n))) : \
|
||||
__rounddown_pow_of_two(n) \
|
||||
) |
||||
|
||||
#endif /* _TOOLS_LINUX_LOG2_H */ |
@ -0,0 +1,89 @@ |
||||
/* find_next_bit.c: fallback find next bit implementation
|
||||
* |
||||
* Copied from lib/find_next_bit.c to tools/lib/next_bit.c |
||||
* |
||||
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. |
||||
* Written by David Howells (dhowells@redhat.com) |
||||
* |
||||
* This program is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU General Public License |
||||
* as published by the Free Software Foundation; either version |
||||
* 2 of the License, or (at your option) any later version. |
||||
*/ |
||||
|
||||
#include <linux/bitops.h> |
||||
#include <asm/types.h> |
||||
#include <asm/byteorder.h> |
||||
|
||||
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) |
||||
|
||||
#ifndef find_next_bit |
||||
/*
|
||||
* Find the next set bit in a memory region. |
||||
*/ |
||||
unsigned long find_next_bit(const unsigned long *addr, unsigned long size, |
||||
unsigned long offset) |
||||
{ |
||||
const unsigned long *p = addr + BITOP_WORD(offset); |
||||
unsigned long result = offset & ~(BITS_PER_LONG-1); |
||||
unsigned long tmp; |
||||
|
||||
if (offset >= size) |
||||
return size; |
||||
size -= result; |
||||
offset %= BITS_PER_LONG; |
||||
if (offset) { |
||||
tmp = *(p++); |
||||
tmp &= (~0UL << offset); |
||||
if (size < BITS_PER_LONG) |
||||
goto found_first; |
||||
if (tmp) |
||||
goto found_middle; |
||||
size -= BITS_PER_LONG; |
||||
result += BITS_PER_LONG; |
||||
} |
||||
while (size & ~(BITS_PER_LONG-1)) { |
||||
if ((tmp = *(p++))) |
||||
goto found_middle; |
||||
result += BITS_PER_LONG; |
||||
size -= BITS_PER_LONG; |
||||
} |
||||
if (!size) |
||||
return result; |
||||
tmp = *p; |
||||
|
||||
found_first: |
||||
tmp &= (~0UL >> (BITS_PER_LONG - size)); |
||||
if (tmp == 0UL) /* Are any bits set? */ |
||||
return result + size; /* Nope. */ |
||||
found_middle: |
||||
return result + __ffs(tmp); |
||||
} |
||||
#endif |
||||
|
||||
#ifndef find_first_bit |
||||
/*
|
||||
* Find the first set bit in a memory region. |
||||
*/ |
||||
unsigned long find_first_bit(const unsigned long *addr, unsigned long size) |
||||
{ |
||||
const unsigned long *p = addr; |
||||
unsigned long result = 0; |
||||
unsigned long tmp; |
||||
|
||||
while (size & ~(BITS_PER_LONG-1)) { |
||||
if ((tmp = *(p++))) |
||||
goto found; |
||||
result += BITS_PER_LONG; |
||||
size -= BITS_PER_LONG; |
||||
} |
||||
if (!size) |
||||
return result; |
||||
|
||||
tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); |
||||
if (tmp == 0UL) /* Are any bits set? */ |
||||
return result + size; /* Nope. */ |
||||
found: |
||||
return result + __ffs(tmp); |
||||
} |
||||
#endif |
@ -1,304 +0,0 @@ |
||||
/*
|
||||
* mem-memset.c |
||||
* |
||||
* memset: Simple memory set in various ways |
||||
* |
||||
* Trivial clone of mem-memcpy.c. |
||||
*/ |
||||
|
||||
#include "../perf.h" |
||||
#include "../util/util.h" |
||||
#include "../util/parse-options.h" |
||||
#include "../util/header.h" |
||||
#include "../util/cloexec.h" |
||||
#include "bench.h" |
||||
#include "mem-memset-arch.h" |
||||
|
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
#include <sys/time.h> |
||||
#include <errno.h> |
||||
|
||||
#define K 1024 |
||||
|
||||
static const char *length_str = "1MB"; |
||||
static const char *routine = "default"; |
||||
static int iterations = 1; |
||||
static bool use_cycle; |
||||
static int cycle_fd; |
||||
static bool only_prefault; |
||||
static bool no_prefault; |
||||
|
||||
static const struct option options[] = { |
||||
OPT_STRING('l', "length", &length_str, "1MB", |
||||
"Specify length of memory to set. " |
||||
"Available units: B, KB, MB, GB and TB (upper and lower)"), |
||||
OPT_STRING('r', "routine", &routine, "default", |
||||
"Specify routine to set"), |
||||
OPT_INTEGER('i', "iterations", &iterations, |
||||
"repeat memset() invocation this number of times"), |
||||
OPT_BOOLEAN('c', "cycle", &use_cycle, |
||||
"Use cycles event instead of gettimeofday() for measuring"), |
||||
OPT_BOOLEAN('o', "only-prefault", &only_prefault, |
||||
"Show only the result with page faults before memset()"), |
||||
OPT_BOOLEAN('n', "no-prefault", &no_prefault, |
||||
"Show only the result without page faults before memset()"), |
||||
OPT_END() |
||||
}; |
||||
|
||||
typedef void *(*memset_t)(void *, int, size_t); |
||||
|
||||
struct routine { |
||||
const char *name; |
||||
const char *desc; |
||||
memset_t fn; |
||||
}; |
||||
|
||||
static const struct routine routines[] = { |
||||
{ "default", |
||||
"Default memset() provided by glibc", |
||||
memset }, |
||||
#ifdef HAVE_ARCH_X86_64_SUPPORT |
||||
|
||||
#define MEMSET_FN(fn, name, desc) { name, desc, fn }, |
||||
#include "mem-memset-x86-64-asm-def.h" |
||||
#undef MEMSET_FN |
||||
|
||||
#endif |
||||
|
||||
{ NULL, |
||||
NULL, |
||||
NULL } |
||||
}; |
||||
|
||||
static const char * const bench_mem_memset_usage[] = { |
||||
"perf bench mem memset <options>", |
||||
NULL |
||||
}; |
||||
|
||||
static struct perf_event_attr cycle_attr = { |
||||
.type = PERF_TYPE_HARDWARE, |
||||
.config = PERF_COUNT_HW_CPU_CYCLES |
||||
}; |
||||
|
||||
static void init_cycle(void) |
||||
{ |
||||
cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, |
||||
perf_event_open_cloexec_flag()); |
||||
|
||||
if (cycle_fd < 0 && errno == ENOSYS) |
||||
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); |
||||
else |
||||
BUG_ON(cycle_fd < 0); |
||||
} |
||||
|
||||
static u64 get_cycle(void) |
||||
{ |
||||
int ret; |
||||
u64 clk; |
||||
|
||||
ret = read(cycle_fd, &clk, sizeof(u64)); |
||||
BUG_ON(ret != sizeof(u64)); |
||||
|
||||
return clk; |
||||
} |
||||
|
||||
static double timeval2double(struct timeval *ts) |
||||
{ |
||||
return (double)ts->tv_sec + |
||||
(double)ts->tv_usec / (double)1000000; |
||||
} |
||||
|
||||
static void alloc_mem(void **dst, size_t length) |
||||
{ |
||||
*dst = zalloc(length); |
||||
if (!*dst) |
||||
die("memory allocation failed - maybe length is too large?\n"); |
||||
} |
||||
|
||||
static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault) |
||||
{ |
||||
u64 cycle_start = 0ULL, cycle_end = 0ULL; |
||||
void *dst = NULL; |
||||
int i; |
||||
|
||||
alloc_mem(&dst, len); |
||||
|
||||
if (prefault) |
||||
fn(dst, -1, len); |
||||
|
||||
cycle_start = get_cycle(); |
||||
for (i = 0; i < iterations; ++i) |
||||
fn(dst, i, len); |
||||
cycle_end = get_cycle(); |
||||
|
||||
free(dst); |
||||
return cycle_end - cycle_start; |
||||
} |
||||
|
||||
static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) |
||||
{ |
||||
struct timeval tv_start, tv_end, tv_diff; |
||||
void *dst = NULL; |
||||
int i; |
||||
|
||||
alloc_mem(&dst, len); |
||||
|
||||
if (prefault) |
||||
fn(dst, -1, len); |
||||
|
||||
BUG_ON(gettimeofday(&tv_start, NULL)); |
||||
for (i = 0; i < iterations; ++i) |
||||
fn(dst, i, len); |
||||
BUG_ON(gettimeofday(&tv_end, NULL)); |
||||
|
||||
timersub(&tv_end, &tv_start, &tv_diff); |
||||
|
||||
free(dst); |
||||
return (double)((double)len / timeval2double(&tv_diff)); |
||||
} |
||||
|
||||
#define pf (no_prefault ? 0 : 1) |
||||
|
||||
#define print_bps(x) do { \ |
||||
if (x < K) \
|
||||
printf(" %14lf B/Sec", x); \
|
||||
else if (x < K * K) \
|
||||
printf(" %14lfd KB/Sec", x / K); \
|
||||
else if (x < K * K * K) \
|
||||
printf(" %14lf MB/Sec", x / K / K); \
|
||||
else \
|
||||
printf(" %14lf GB/Sec", x / K / K / K); \
|
||||
} while (0) |
||||
|
||||
int bench_mem_memset(int argc, const char **argv, |
||||
const char *prefix __maybe_unused) |
||||
{ |
||||
int i; |
||||
size_t len; |
||||
double result_bps[2]; |
||||
u64 result_cycle[2]; |
||||
|
||||
argc = parse_options(argc, argv, options, |
||||
bench_mem_memset_usage, 0); |
||||
|
||||
if (no_prefault && only_prefault) { |
||||
fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); |
||||
return 1; |
||||
} |
||||
|
||||
if (use_cycle) |
||||
init_cycle(); |
||||
|
||||
len = (size_t)perf_atoll((char *)length_str); |
||||
|
||||
result_cycle[0] = result_cycle[1] = 0ULL; |
||||
result_bps[0] = result_bps[1] = 0.0; |
||||
|
||||
if ((s64)len <= 0) { |
||||
fprintf(stderr, "Invalid length:%s\n", length_str); |
||||
return 1; |
||||
} |
||||
|
||||
/* same to without specifying either of prefault and no-prefault */ |
||||
if (only_prefault && no_prefault) |
||||
only_prefault = no_prefault = false; |
||||
|
||||
for (i = 0; routines[i].name; i++) { |
||||
if (!strcmp(routines[i].name, routine)) |
||||
break; |
||||
} |
||||
if (!routines[i].name) { |
||||
printf("Unknown routine:%s\n", routine); |
||||
printf("Available routines...\n"); |
||||
for (i = 0; routines[i].name; i++) { |
||||
printf("\t%s ... %s\n", |
||||
routines[i].name, routines[i].desc); |
||||
} |
||||
return 1; |
||||
} |
||||
|
||||
if (bench_format == BENCH_FORMAT_DEFAULT) |
||||
printf("# Copying %s Bytes ...\n\n", length_str); |
||||
|
||||
if (!only_prefault && !no_prefault) { |
||||
/* show both of results */ |
||||
if (use_cycle) { |
||||
result_cycle[0] = |
||||
do_memset_cycle(routines[i].fn, len, false); |
||||
result_cycle[1] = |
||||
do_memset_cycle(routines[i].fn, len, true); |
||||
} else { |
||||
result_bps[0] = |
||||
do_memset_gettimeofday(routines[i].fn, |
||||
len, false); |
||||
result_bps[1] = |
||||
do_memset_gettimeofday(routines[i].fn, |
||||
len, true); |
||||
} |
||||
} else { |
||||
if (use_cycle) { |
||||
result_cycle[pf] = |
||||
do_memset_cycle(routines[i].fn, |
||||
len, only_prefault); |
||||
} else { |
||||
result_bps[pf] = |
||||
do_memset_gettimeofday(routines[i].fn, |
||||
len, only_prefault); |
||||
} |
||||
} |
||||
|
||||
switch (bench_format) { |
||||
case BENCH_FORMAT_DEFAULT: |
||||
if (!only_prefault && !no_prefault) { |
||||
if (use_cycle) { |
||||
printf(" %14lf Cycle/Byte\n", |
||||
(double)result_cycle[0] |
||||
/ (double)len); |
||||
printf(" %14lf Cycle/Byte (with prefault)\n ", |
||||
(double)result_cycle[1] |
||||
/ (double)len); |
||||
} else { |
||||
print_bps(result_bps[0]); |
||||
printf("\n"); |
||||
print_bps(result_bps[1]); |
||||
printf(" (with prefault)\n"); |
||||
} |
||||
} else { |
||||
if (use_cycle) { |
||||
printf(" %14lf Cycle/Byte", |
||||
(double)result_cycle[pf] |
||||
/ (double)len); |
||||
} else |
||||
print_bps(result_bps[pf]); |
||||
|
||||
printf("%s\n", only_prefault ? " (with prefault)" : ""); |
||||
} |
||||
break; |
||||
case BENCH_FORMAT_SIMPLE: |
||||
if (!only_prefault && !no_prefault) { |
||||
if (use_cycle) { |
||||
printf("%lf %lf\n", |
||||
(double)result_cycle[0] / (double)len, |
||||
(double)result_cycle[1] / (double)len); |
||||
} else { |
||||
printf("%lf %lf\n", |
||||
result_bps[0], result_bps[1]); |
||||
} |
||||
} else { |
||||
if (use_cycle) { |
||||
printf("%lf\n", (double)result_cycle[pf] |
||||
/ (double)len); |
||||
} else |
||||
printf("%lf\n", result_bps[pf]); |
||||
} |
||||
break; |
||||
default: |
||||
/* reaching this means there's some disaster: */ |
||||
die("unknown format: %d\n", bench_format); |
||||
break; |
||||
} |
||||
|
||||
return 0; |
||||
} |
@ -1,162 +0,0 @@ |
||||
#ifndef _PERF_LINUX_BITOPS_H_ |
||||
#define _PERF_LINUX_BITOPS_H_ |
||||
|
||||
#include <linux/kernel.h> |
||||
#include <linux/compiler.h> |
||||
#include <asm/hweight.h> |
||||
|
||||
#ifndef __WORDSIZE |
||||
#define __WORDSIZE (__SIZEOF_LONG__ * 8) |
||||
#endif |
||||
|
||||
#define BITS_PER_LONG __WORDSIZE |
||||
#define BITS_PER_BYTE 8 |
||||
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) |
||||
#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) |
||||
#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) |
||||
#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) |
||||
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) |
||||
#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) |
||||
|
||||
#define for_each_set_bit(bit, addr, size) \ |
||||
for ((bit) = find_first_bit((addr), (size)); \
|
||||
(bit) < (size); \
|
||||
(bit) = find_next_bit((addr), (size), (bit) + 1)) |
||||
|
||||
/* same as for_each_set_bit() but use bit as value to start with */ |
||||
#define for_each_set_bit_from(bit, addr, size) \ |
||||
for ((bit) = find_next_bit((addr), (size), (bit)); \
|
||||
(bit) < (size); \
|
||||
(bit) = find_next_bit((addr), (size), (bit) + 1)) |
||||
|
||||
static inline void set_bit(int nr, unsigned long *addr) |
||||
{ |
||||
addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); |
||||
} |
||||
|
||||
static inline void clear_bit(int nr, unsigned long *addr) |
||||
{ |
||||
addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG)); |
||||
} |
||||
|
||||
static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) |
||||
{ |
||||
return ((1UL << (nr % BITS_PER_LONG)) & |
||||
(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; |
||||
} |
||||
|
||||
static inline unsigned long hweight_long(unsigned long w) |
||||
{ |
||||
return sizeof(w) == 4 ? hweight32(w) : hweight64(w); |
||||
} |
||||
|
||||
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) |
||||
|
||||
/**
|
||||
* __ffs - find first bit in word. |
||||
* @word: The word to search |
||||
* |
||||
* Undefined if no bit exists, so code should check against 0 first. |
||||
*/ |
||||
static __always_inline unsigned long __ffs(unsigned long word) |
||||
{ |
||||
int num = 0; |
||||
|
||||
#if BITS_PER_LONG == 64 |
||||
if ((word & 0xffffffff) == 0) { |
||||
num += 32; |
||||
word >>= 32; |
||||
} |
||||
#endif |
||||
if ((word & 0xffff) == 0) { |
||||
num += 16; |
||||
word >>= 16; |
||||
} |
||||
if ((word & 0xff) == 0) { |
||||
num += 8; |
||||
word >>= 8; |
||||
} |
||||
if ((word & 0xf) == 0) { |
||||
num += 4; |
||||
word >>= 4; |
||||
} |
||||
if ((word & 0x3) == 0) { |
||||
num += 2; |
||||
word >>= 2; |
||||
} |
||||
if ((word & 0x1) == 0) |
||||
num += 1; |
||||
return num; |
||||
} |
||||
|
||||
typedef const unsigned long __attribute__((__may_alias__)) long_alias_t; |
||||
|
||||
/*
|
||||
* Find the first set bit in a memory region. |
||||
*/ |
||||
static inline unsigned long |
||||
find_first_bit(const unsigned long *addr, unsigned long size) |
||||
{ |
||||
long_alias_t *p = (long_alias_t *) addr; |
||||
unsigned long result = 0; |
||||
unsigned long tmp; |
||||
|
||||
while (size & ~(BITS_PER_LONG-1)) { |
||||
if ((tmp = *(p++))) |
||||
goto found; |
||||
result += BITS_PER_LONG; |
||||
size -= BITS_PER_LONG; |
||||
} |
||||
if (!size) |
||||
return result; |
||||
|
||||
tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); |
||||
if (tmp == 0UL) /* Are any bits set? */ |
||||
return result + size; /* Nope. */ |
||||
found: |
||||
return result + __ffs(tmp); |
||||
} |
||||
|
||||
/*
|
||||
* Find the next set bit in a memory region. |
||||
*/ |
||||
static inline unsigned long |
||||
find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) |
||||
{ |
||||
const unsigned long *p = addr + BITOP_WORD(offset); |
||||
unsigned long result = offset & ~(BITS_PER_LONG-1); |
||||
unsigned long tmp; |
||||
|
||||
if (offset >= size) |
||||
return size; |
||||
size -= result; |
||||
offset %= BITS_PER_LONG; |
||||
if (offset) { |
||||
tmp = *(p++); |
||||
tmp &= (~0UL << offset); |
||||
if (size < BITS_PER_LONG) |
||||
goto found_first; |
||||
if (tmp) |
||||
goto found_middle; |
||||
size -= BITS_PER_LONG; |
||||
result += BITS_PER_LONG; |
||||
} |
||||
while (size & ~(BITS_PER_LONG-1)) { |
||||
if ((tmp = *(p++))) |
||||
goto found_middle; |
||||
result += BITS_PER_LONG; |
||||
size -= BITS_PER_LONG; |
||||
} |
||||
if (!size) |
||||
return result; |
||||
tmp = *p; |
||||
|
||||
found_first: |
||||
tmp &= (~0UL >> (BITS_PER_LONG - size)); |
||||
if (tmp == 0UL) /* Are any bits set? */ |
||||
return result + size; /* Nope. */ |
||||
found_middle: |
||||
return result + __ffs(tmp); |
||||
} |
||||
|
||||
#endif |
Loading…
Reference in new issue