|
|
|
#ifndef LOAD_OFFSET
|
|
|
|
#define LOAD_OFFSET 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef VMLINUX_SYMBOL
|
|
|
|
#define VMLINUX_SYMBOL(_sym_) _sym_
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Align . to a 8 byte boundary equals to maximum function alignment. */
|
|
|
|
#define ALIGN_FUNCTION() . = ALIGN(8)
|
|
|
|
|
|
|
|
/* The actual configuration determine if the init/exit sections
|
|
|
|
* are handled as text/data or they can be discarded (which
|
|
|
|
* often happens at runtime)
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_HOTPLUG
|
|
|
|
#define DEV_KEEP(sec) *(.dev##sec)
|
|
|
|
#define DEV_DISCARD(sec)
|
|
|
|
#else
|
|
|
|
#define DEV_KEEP(sec)
|
|
|
|
#define DEV_DISCARD(sec) *(.dev##sec)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
#define CPU_KEEP(sec) *(.cpu##sec)
|
|
|
|
#define CPU_DISCARD(sec)
|
|
|
|
#else
|
|
|
|
#define CPU_KEEP(sec)
|
|
|
|
#define CPU_DISCARD(sec) *(.cpu##sec)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(CONFIG_MEMORY_HOTPLUG)
|
|
|
|
#define MEM_KEEP(sec) *(.mem##sec)
|
|
|
|
#define MEM_DISCARD(sec)
|
|
|
|
#else
|
|
|
|
#define MEM_KEEP(sec)
|
|
|
|
#define MEM_DISCARD(sec) *(.mem##sec)
|
|
|
|
#endif
|
|
|
|
|
ftrace: create __mcount_loc section
This patch creates a section in the kernel called "__mcount_loc".
This will hold a list of pointers to the mcount relocation for
each call site of mcount.
For example:
objdump -dr init/main.o
[...]
Disassembly of section .text:
0000000000000000 <do_one_initcall>:
0: 55 push %rbp
[...]
000000000000017b <init_post>:
17b: 55 push %rbp
17c: 48 89 e5 mov %rsp,%rbp
17f: 53 push %rbx
180: 48 83 ec 08 sub $0x8,%rsp
184: e8 00 00 00 00 callq 189 <init_post+0xe>
185: R_X86_64_PC32 mcount+0xfffffffffffffffc
[...]
We will add a section to point to each function call.
.section __mcount_loc,"a",@progbits
[...]
.quad .text + 0x185
[...]
The offset to of the mcount call site in init_post is an offset from
the start of the section, and not the start of the function init_post.
The mcount relocation is at the call site 0x185 from the start of the
.text section.
.text + 0x185 == init_post + 0xa
We need a way to add this __mcount_loc section in a way that we do not
lose the relocations after final link. The .text section here will
be attached to all other .text sections after final link and the
offsets will be meaningless. We need to keep track of where these
.text sections are.
To do this, we use the start of the first function in the section.
do_one_initcall. We can make a tmp.s file with this function as a reference
to the start of the .text section.
.section __mcount_loc,"a",@progbits
[...]
.quad do_one_initcall + 0x185
[...]
Then we can compile the tmp.s into a tmp.o
gcc -c tmp.s -o tmp.o
And link it into back into main.o.
ld -r main.o tmp.o -o tmp_main.o
mv tmp_main.o main.o
But we have a problem. What happens if the first function in a section
is not exported, and is a static function. The linker will not let
the tmp.o use it. This case exists in main.o as well.
Disassembly of section .init.text:
0000000000000000 <set_reset_devices>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: e8 00 00 00 00 callq 9 <set_reset_devices+0x9>
5: R_X86_64_PC32 mcount+0xfffffffffffffffc
The first function in .init.text is a static function.
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices
The lowercase 't' means that set_reset_devices is local and is not exported.
If we simply try to link the tmp.o with the set_reset_devices we end
up with two symbols: one local and one global.
.section __mcount_loc,"a",@progbits
.quad set_reset_devices + 0x10
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices
U set_reset_devices
We still have an undefined reference to set_reset_devices, and if we try
to compile the kernel, we will end up with an undefined reference to
set_reset_devices, or even worst, it could be exported someplace else,
and then we will have a reference to the wrong location.
To handle this case, we make an intermediate step using objcopy.
We convert set_reset_devices into a global exported symbol before linking
it with tmp.o and set it back afterwards.
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 T set_reset_devices
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 T set_reset_devices
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices
Now we have a section in main.o called __mcount_loc that we can place
somewhere in the kernel using vmlinux.ld.S and access it to convert
all these locations that call mcount into nops before starting SMP
and thus, eliminating the need to do this with kstop_machine.
Note, A well documented perl script (scripts/recordmcount.pl) is used
to do all this in one location.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
17 years ago
|
|
|
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
|
|
|
|
#define MCOUNT_REC() VMLINUX_SYMBOL(__start_mcount_loc) = .; \
|
|
|
|
*(__mcount_loc) \
|
|
|
|
VMLINUX_SYMBOL(__stop_mcount_loc) = .;
|
|
|
|
#else
|
|
|
|
#define MCOUNT_REC()
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CONFIG_TRACE_BRANCH_PROFILING
|
|
|
|
#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \
|
|
|
|
*(_ftrace_annotated_branch) \
|
|
|
|
VMLINUX_SYMBOL(__stop_annotated_branch_profile) = .;
|
|
|
|
#else
|
|
|
|
#define LIKELY_PROFILE()
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CONFIG_PROFILE_ALL_BRANCHES
|
|
|
|
#define BRANCH_PROFILE() VMLINUX_SYMBOL(__start_branch_profile) = .; \
|
|
|
|
*(_ftrace_branch) \
|
|
|
|
VMLINUX_SYMBOL(__stop_branch_profile) = .;
|
|
|
|
#else
|
|
|
|
#define BRANCH_PROFILE()
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* .data section */
|
|
|
|
#define DATA_DATA \
|
|
|
|
*(.data) \
|
|
|
|
*(.data.init.refok) \
|
|
|
|
*(.ref.data) \
|
|
|
|
DEV_KEEP(init.data) \
|
|
|
|
DEV_KEEP(exit.data) \
|
|
|
|
CPU_KEEP(init.data) \
|
|
|
|
CPU_KEEP(exit.data) \
|
|
|
|
MEM_KEEP(init.data) \
|
|
|
|
MEM_KEEP(exit.data) \
|
|
|
|
. = ALIGN(8); \
|
|
|
|
VMLINUX_SYMBOL(__start___markers) = .; \
|
|
|
|
*(__markers) \
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
17 years ago
|
|
|
VMLINUX_SYMBOL(__stop___markers) = .; \
|
|
|
|
. = ALIGN(32); \
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
17 years ago
|
|
|
VMLINUX_SYMBOL(__start___tracepoints) = .; \
|
|
|
|
*(__tracepoints) \
|
|
|
|
VMLINUX_SYMBOL(__stop___tracepoints) = .; \
|
|
|
|
LIKELY_PROFILE() \
|
|
|
|
BRANCH_PROFILE()
|
|
|
|
|
|
|
|
#define RO_DATA(align) \
|
|
|
|
. = ALIGN((align)); \
|
|
|
|
.rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start_rodata) = .; \
|
|
|
|
*(.rodata) *(.rodata.*) \
|
|
|
|
*(__vermagic) /* Kernel version magic */ \
|
|
|
|
*(__markers_strings) /* Markers: strings */ \
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
17 years ago
|
|
|
*(__tracepoints_strings)/* Tracepoints: strings */ \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
.rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \
|
|
|
|
*(.rodata1) \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
BUG_TABLE \
|
|
|
|
\
|
|
|
|
/* PCI quirks */ \
|
|
|
|
.pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \
|
|
|
|
*(.pci_fixup_early) \
|
|
|
|
VMLINUX_SYMBOL(__end_pci_fixups_early) = .; \
|
|
|
|
VMLINUX_SYMBOL(__start_pci_fixups_header) = .; \
|
|
|
|
*(.pci_fixup_header) \
|
|
|
|
VMLINUX_SYMBOL(__end_pci_fixups_header) = .; \
|
|
|
|
VMLINUX_SYMBOL(__start_pci_fixups_final) = .; \
|
|
|
|
*(.pci_fixup_final) \
|
|
|
|
VMLINUX_SYMBOL(__end_pci_fixups_final) = .; \
|
|
|
|
VMLINUX_SYMBOL(__start_pci_fixups_enable) = .; \
|
|
|
|
*(.pci_fixup_enable) \
|
|
|
|
VMLINUX_SYMBOL(__end_pci_fixups_enable) = .; \
|
|
|
|
VMLINUX_SYMBOL(__start_pci_fixups_resume) = .; \
|
|
|
|
*(.pci_fixup_resume) \
|
|
|
|
VMLINUX_SYMBOL(__end_pci_fixups_resume) = .; \
|
|
|
|
VMLINUX_SYMBOL(__start_pci_fixups_resume_early) = .; \
|
|
|
|
*(.pci_fixup_resume_early) \
|
|
|
|
VMLINUX_SYMBOL(__end_pci_fixups_resume_early) = .; \
|
|
|
|
VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .; \
|
|
|
|
*(.pci_fixup_suspend) \
|
|
|
|
VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Built-in firmware blobs */ \
|
|
|
|
.builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start_builtin_fw) = .; \
|
|
|
|
*(.builtin_fw) \
|
|
|
|
VMLINUX_SYMBOL(__end_builtin_fw) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* RapidIO route ops */ \
|
|
|
|
.rio_route : AT(ADDR(.rio_route) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start_rio_route_ops) = .; \
|
|
|
|
*(.rio_route_ops) \
|
|
|
|
VMLINUX_SYMBOL(__end_rio_route_ops) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
TRACEDATA \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: Normal symbols */ \
|
|
|
|
__ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___ksymtab) = .; \
|
|
|
|
*(__ksymtab) \
|
|
|
|
VMLINUX_SYMBOL(__stop___ksymtab) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: GPL-only symbols */ \
|
|
|
|
__ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \
|
|
|
|
*(__ksymtab_gpl) \
|
|
|
|
VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: Normal unused symbols */ \
|
|
|
|
__ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \
|
|
|
|
*(__ksymtab_unused) \
|
|
|
|
VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: GPL-only unused symbols */ \
|
|
|
|
__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \
|
|
|
|
*(__ksymtab_unused_gpl) \
|
|
|
|
VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: GPL-future-only symbols */ \
|
|
|
|
__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \
|
|
|
|
*(__ksymtab_gpl_future) \
|
|
|
|
VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: Normal symbols */ \
|
|
|
|
__kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___kcrctab) = .; \
|
|
|
|
*(__kcrctab) \
|
|
|
|
VMLINUX_SYMBOL(__stop___kcrctab) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: GPL-only symbols */ \
|
|
|
|
__kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___kcrctab_gpl) = .; \
|
|
|
|
*(__kcrctab_gpl) \
|
|
|
|
VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: Normal unused symbols */ \
|
|
|
|
__kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___kcrctab_unused) = .; \
|
|
|
|
*(__kcrctab_unused) \
|
|
|
|
VMLINUX_SYMBOL(__stop___kcrctab_unused) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: GPL-only unused symbols */ \
|
|
|
|
__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .; \
|
|
|
|
*(__kcrctab_unused_gpl) \
|
|
|
|
VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: GPL-future-only symbols */ \
|
|
|
|
__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .; \
|
|
|
|
*(__kcrctab_gpl_future) \
|
|
|
|
VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Kernel symbol table: strings */ \
|
|
|
|
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
|
|
|
|
*(__ksymtab_strings) \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* __*init sections */ \
|
|
|
|
__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \
|
|
|
|
*(.ref.rodata) \
|
ftrace: create __mcount_loc section
This patch creates a section in the kernel called "__mcount_loc".
This will hold a list of pointers to the mcount relocation for
each call site of mcount.
For example:
objdump -dr init/main.o
[...]
Disassembly of section .text:
0000000000000000 <do_one_initcall>:
0: 55 push %rbp
[...]
000000000000017b <init_post>:
17b: 55 push %rbp
17c: 48 89 e5 mov %rsp,%rbp
17f: 53 push %rbx
180: 48 83 ec 08 sub $0x8,%rsp
184: e8 00 00 00 00 callq 189 <init_post+0xe>
185: R_X86_64_PC32 mcount+0xfffffffffffffffc
[...]
We will add a section to point to each function call.
.section __mcount_loc,"a",@progbits
[...]
.quad .text + 0x185
[...]
The offset to of the mcount call site in init_post is an offset from
the start of the section, and not the start of the function init_post.
The mcount relocation is at the call site 0x185 from the start of the
.text section.
.text + 0x185 == init_post + 0xa
We need a way to add this __mcount_loc section in a way that we do not
lose the relocations after final link. The .text section here will
be attached to all other .text sections after final link and the
offsets will be meaningless. We need to keep track of where these
.text sections are.
To do this, we use the start of the first function in the section.
do_one_initcall. We can make a tmp.s file with this function as a reference
to the start of the .text section.
.section __mcount_loc,"a",@progbits
[...]
.quad do_one_initcall + 0x185
[...]
Then we can compile the tmp.s into a tmp.o
gcc -c tmp.s -o tmp.o
And link it into back into main.o.
ld -r main.o tmp.o -o tmp_main.o
mv tmp_main.o main.o
But we have a problem. What happens if the first function in a section
is not exported, and is a static function. The linker will not let
the tmp.o use it. This case exists in main.o as well.
Disassembly of section .init.text:
0000000000000000 <set_reset_devices>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: e8 00 00 00 00 callq 9 <set_reset_devices+0x9>
5: R_X86_64_PC32 mcount+0xfffffffffffffffc
The first function in .init.text is a static function.
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices
The lowercase 't' means that set_reset_devices is local and is not exported.
If we simply try to link the tmp.o with the set_reset_devices we end
up with two symbols: one local and one global.
.section __mcount_loc,"a",@progbits
.quad set_reset_devices + 0x10
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices
U set_reset_devices
We still have an undefined reference to set_reset_devices, and if we try
to compile the kernel, we will end up with an undefined reference to
set_reset_devices, or even worst, it could be exported someplace else,
and then we will have a reference to the wrong location.
To handle this case, we make an intermediate step using objcopy.
We convert set_reset_devices into a global exported symbol before linking
it with tmp.o and set it back afterwards.
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 T set_reset_devices
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 T set_reset_devices
00000000000000a8 t __setup_set_reset_devices
000000000000105f t __setup_str_set_reset_devices
0000000000000000 t set_reset_devices
Now we have a section in main.o called __mcount_loc that we can place
somewhere in the kernel using vmlinux.ld.S and access it to convert
all these locations that call mcount into nops before starting SMP
and thus, eliminating the need to do this with kstop_machine.
Note, A well documented perl script (scripts/recordmcount.pl) is used
to do all this in one location.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
17 years ago
|
|
|
MCOUNT_REC() \
|
|
|
|
DEV_KEEP(init.rodata) \
|
|
|
|
DEV_KEEP(exit.rodata) \
|
|
|
|
CPU_KEEP(init.rodata) \
|
|
|
|
CPU_KEEP(exit.rodata) \
|
|
|
|
MEM_KEEP(init.rodata) \
|
|
|
|
MEM_KEEP(exit.rodata) \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
/* Built-in module parameters. */ \
|
|
|
|
__param : AT(ADDR(__param) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___param) = .; \
|
|
|
|
*(__param) \
|
|
|
|
VMLINUX_SYMBOL(__stop___param) = .; \
|
|
|
|
. = ALIGN((align)); \
|
|
|
|
VMLINUX_SYMBOL(__end_rodata) = .; \
|
|
|
|
} \
|
|
|
|
. = ALIGN((align));
|
|
|
|
|
|
|
|
/* RODATA provided for backward compatibility.
|
|
|
|
* All archs are supposed to use RO_DATA() */
|
|
|
|
#define RODATA RO_DATA(4096)
|
|
|
|
|
|
|
|
#define SECURITY_INIT \
|
|
|
|
.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__security_initcall_start) = .; \
|
|
|
|
*(.security_initcall.init) \
|
|
|
|
VMLINUX_SYMBOL(__security_initcall_end) = .; \
|
|
|
|
}
|
|
|
|
|
|
|
|
/* .text section. Map to function alignment to avoid address changes
|
|
|
|
* during second ld run in second ld pass when generating System.map */
|
|
|
|
#define TEXT_TEXT \
|
|
|
|
ALIGN_FUNCTION(); \
|
|
|
|
*(.text.hot) \
|
|
|
|
*(.text) \
|
|
|
|
*(.ref.text) \
|
|
|
|
*(.text.init.refok) \
|
|
|
|
*(.exit.text.refok) \
|
|
|
|
DEV_KEEP(init.text) \
|
|
|
|
DEV_KEEP(exit.text) \
|
|
|
|
CPU_KEEP(init.text) \
|
|
|
|
CPU_KEEP(exit.text) \
|
|
|
|
MEM_KEEP(init.text) \
|
|
|
|
MEM_KEEP(exit.text) \
|
|
|
|
*(.text.unlikely)
|
|
|
|
|
|
|
|
|
|
|
|
/* sched.text is aling to function alignment to secure we have same
|
|
|
|
* address even at second ld pass when generating System.map */
|
|
|
|
#define SCHED_TEXT \
|
|
|
|
ALIGN_FUNCTION(); \
|
|
|
|
VMLINUX_SYMBOL(__sched_text_start) = .; \
|
|
|
|
*(.sched.text) \
|
|
|
|
VMLINUX_SYMBOL(__sched_text_end) = .;
|
|
|
|
|
|
|
|
/* spinlock.text is aling to function alignment to secure we have same
|
|
|
|
* address even at second ld pass when generating System.map */
|
|
|
|
#define LOCK_TEXT \
|
|
|
|
ALIGN_FUNCTION(); \
|
|
|
|
VMLINUX_SYMBOL(__lock_text_start) = .; \
|
|
|
|
*(.spinlock.text) \
|
|
|
|
VMLINUX_SYMBOL(__lock_text_end) = .;
|
|
|
|
|
|
|
|
#define KPROBES_TEXT \
|
|
|
|
ALIGN_FUNCTION(); \
|
|
|
|
VMLINUX_SYMBOL(__kprobes_text_start) = .; \
|
|
|
|
*(.kprobes.text) \
|
|
|
|
VMLINUX_SYMBOL(__kprobes_text_end) = .;
|
|
|
|
|
|
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
|
|
#define IRQENTRY_TEXT \
|
|
|
|
ALIGN_FUNCTION(); \
|
|
|
|
VMLINUX_SYMBOL(__irqentry_text_start) = .; \
|
|
|
|
*(.irqentry.text) \
|
|
|
|
VMLINUX_SYMBOL(__irqentry_text_end) = .;
|
|
|
|
#else
|
|
|
|
#define IRQENTRY_TEXT
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Section used for early init (in .S files) */
|
|
|
|
#define HEAD_TEXT *(.head.text)
|
|
|
|
|
|
|
|
/* init and exit section handling */
|
|
|
|
#define INIT_DATA \
|
|
|
|
*(.init.data) \
|
|
|
|
DEV_DISCARD(init.data) \
|
|
|
|
DEV_DISCARD(init.rodata) \
|
|
|
|
CPU_DISCARD(init.data) \
|
|
|
|
CPU_DISCARD(init.rodata) \
|
|
|
|
MEM_DISCARD(init.data) \
|
driver core: basic infrastructure for per-module dynamic debug messages
Base infrastructure to enable per-module debug messages.
I've introduced CONFIG_DYNAMIC_PRINTK_DEBUG, which when enabled centralizes
control of debugging statements on a per-module basis in one /proc file,
currently, <debugfs>/dynamic_printk/modules. When, CONFIG_DYNAMIC_PRINTK_DEBUG,
is not set, debugging statements can still be enabled as before, often by
defining 'DEBUG' for the proper compilation unit. Thus, this patch set has no
affect when CONFIG_DYNAMIC_PRINTK_DEBUG is not set.
The infrastructure currently ties into all pr_debug() and dev_dbg() calls. That
is, if CONFIG_DYNAMIC_PRINTK_DEBUG is set, all pr_debug() and dev_dbg() calls
can be dynamically enabled/disabled on a per-module basis.
Future plans include extending this functionality to subsystems, that define
their own debug levels and flags.
Usage:
Dynamic debugging is controlled by the debugfs file,
<debugfs>/dynamic_printk/modules. This file contains a list of the modules that
can be enabled. The format of the file is as follows:
<module_name> <enabled=0/1>
.
.
.
<module_name> : Name of the module in which the debug call resides
<enabled=0/1> : whether the messages are enabled or not
For example:
snd_hda_intel enabled=0
fixup enabled=1
driver enabled=0
Enable a module:
$echo "set enabled=1 <module_name>" > dynamic_printk/modules
Disable a module:
$echo "set enabled=0 <module_name>" > dynamic_printk/modules
Enable all modules:
$echo "set enabled=1 all" > dynamic_printk/modules
Disable all modules:
$echo "set enabled=0 all" > dynamic_printk/modules
Finally, passing "dynamic_printk" at the command line enables
debugging for all modules. This mode can be turned off via the above
disable command.
[gkh: minor cleanups and tweaks to make the build work quietly]
Signed-off-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
17 years ago
|
|
|
MEM_DISCARD(init.rodata) \
|
|
|
|
/* implement dynamic printk debug */ \
|
|
|
|
VMLINUX_SYMBOL(__start___verbose_strings) = .; \
|
|
|
|
*(__verbose_strings) \
|
|
|
|
VMLINUX_SYMBOL(__stop___verbose_strings) = .; \
|
|
|
|
. = ALIGN(8); \
|
|
|
|
VMLINUX_SYMBOL(__start___verbose) = .; \
|
|
|
|
*(__verbose) \
|
|
|
|
VMLINUX_SYMBOL(__stop___verbose) = .;
|
|
|
|
|
|
|
|
#define INIT_TEXT \
|
|
|
|
*(.init.text) \
|
|
|
|
DEV_DISCARD(init.text) \
|
|
|
|
CPU_DISCARD(init.text) \
|
|
|
|
MEM_DISCARD(init.text)
|
|
|
|
|
|
|
|
#define EXIT_DATA \
|
|
|
|
*(.exit.data) \
|
|
|
|
DEV_DISCARD(exit.data) \
|
|
|
|
DEV_DISCARD(exit.rodata) \
|
|
|
|
CPU_DISCARD(exit.data) \
|
|
|
|
CPU_DISCARD(exit.rodata) \
|
|
|
|
MEM_DISCARD(exit.data) \
|
|
|
|
MEM_DISCARD(exit.rodata)
|
|
|
|
|
|
|
|
#define EXIT_TEXT \
|
|
|
|
*(.exit.text) \
|
|
|
|
DEV_DISCARD(exit.text) \
|
|
|
|
CPU_DISCARD(exit.text) \
|
|
|
|
MEM_DISCARD(exit.text)
|
|
|
|
|
|
|
|
/* DWARF debug sections.
|
|
|
|
Symbols in the DWARF debugging sections are relative to
|
|
|
|
the beginning of the section so we begin them at 0. */
|
|
|
|
#define DWARF_DEBUG \
|
|
|
|
/* DWARF 1 */ \
|
|
|
|
.debug 0 : { *(.debug) } \
|
|
|
|
.line 0 : { *(.line) } \
|
|
|
|
/* GNU DWARF 1 extensions */ \
|
|
|
|
.debug_srcinfo 0 : { *(.debug_srcinfo) } \
|
|
|
|
.debug_sfnames 0 : { *(.debug_sfnames) } \
|
|
|
|
/* DWARF 1.1 and DWARF 2 */ \
|
|
|
|
.debug_aranges 0 : { *(.debug_aranges) } \
|
|
|
|
.debug_pubnames 0 : { *(.debug_pubnames) } \
|
|
|
|
/* DWARF 2 */ \
|
|
|
|
.debug_info 0 : { *(.debug_info \
|
|
|
|
.gnu.linkonce.wi.*) } \
|
|
|
|
.debug_abbrev 0 : { *(.debug_abbrev) } \
|
|
|
|
.debug_line 0 : { *(.debug_line) } \
|
|
|
|
.debug_frame 0 : { *(.debug_frame) } \
|
|
|
|
.debug_str 0 : { *(.debug_str) } \
|
|
|
|
.debug_loc 0 : { *(.debug_loc) } \
|
|
|
|
.debug_macinfo 0 : { *(.debug_macinfo) } \
|
|
|
|
/* SGI/MIPS DWARF 2 extensions */ \
|
|
|
|
.debug_weaknames 0 : { *(.debug_weaknames) } \
|
|
|
|
.debug_funcnames 0 : { *(.debug_funcnames) } \
|
|
|
|
.debug_typenames 0 : { *(.debug_typenames) } \
|
|
|
|
.debug_varnames 0 : { *(.debug_varnames) } \
|
|
|
|
|
|
|
|
/* Stabs debugging sections. */
|
|
|
|
#define STABS_DEBUG \
|
|
|
|
.stab 0 : { *(.stab) } \
|
|
|
|
.stabstr 0 : { *(.stabstr) } \
|
|
|
|
.stab.excl 0 : { *(.stab.excl) } \
|
|
|
|
.stab.exclstr 0 : { *(.stab.exclstr) } \
|
|
|
|
.stab.index 0 : { *(.stab.index) } \
|
|
|
|
.stab.indexstr 0 : { *(.stab.indexstr) } \
|
|
|
|
.comment 0 : { *(.comment) }
|
|
|
|
|
|
|
|
#ifdef CONFIG_GENERIC_BUG
|
[PATCH] Generic BUG implementation
This patch adds common handling for kernel BUGs, for use by architectures as
they wish. The code is derived from arch/powerpc.
The advantages of having common BUG handling are:
- consistent BUG reporting across architectures
- shared implementation of out-of-line file/line data
- implement CONFIG_DEBUG_BUGVERBOSE consistently
This means that in inline impact of BUG is just the illegal instruction
itself, which is an improvement for i386 and x86-64.
A BUG is represented in the instruction stream as an illegal instruction,
which has file/line information associated with it. This extra information is
stored in the __bug_table section in the ELF file.
When the kernel gets an illegal instruction, it first confirms it might
possibly be from a BUG (ie, in kernel mode, the right illegal instruction).
It then calls report_bug(). This searches __bug_table for a matching
instruction pointer, and if found, prints the corresponding file/line
information. If report_bug() determines that it wasn't a BUG which caused the
trap, it returns BUG_TRAP_TYPE_NONE.
Some architectures (powerpc) implement WARN using the same mechanism; if the
illegal instruction was the result of a WARN, then report_bug(Q) returns
CONFIG_DEBUG_BUGVERBOSE; otherwise it returns BUG_TRAP_TYPE_BUG.
lib/bug.c keeps a list of loaded modules which can be searched for __bug_table
entries. The architecture must call
module_bug_finalize()/module_bug_cleanup() from its corresponding
module_finalize/cleanup functions.
Unsetting CONFIG_DEBUG_BUGVERBOSE will reduce the kernel size by some amount.
At the very least, filename and line information will not be recorded for each
but, but architectures may decide to store no extra information per BUG at
all.
Unfortunately, gcc doesn't have a general way to mark an asm() as noreturn, so
architectures will generally have to include an infinite loop (or similar) in
the BUG code, so that gcc knows execution won't continue beyond that point.
gcc does have a __builtin_trap() operator which may be useful to achieve the
same effect, unfortunately it cannot be used to actually implement the BUG
itself, because there's no way to get the instruction's address for use in
generating the __bug_table entry.
[randy.dunlap@oracle.com: Handle BUG=n, GENERIC_BUG=n to prevent build errors]
[bunk@stusta.de: include/linux/bug.h must always #include <linux/module.h]
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
18 years ago
|
|
|
#define BUG_TABLE \
|
|
|
|
. = ALIGN(8); \
|
|
|
|
__bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start___bug_table) = .; \
|
[PATCH] Generic BUG implementation
This patch adds common handling for kernel BUGs, for use by architectures as
they wish. The code is derived from arch/powerpc.
The advantages of having common BUG handling are:
- consistent BUG reporting across architectures
- shared implementation of out-of-line file/line data
- implement CONFIG_DEBUG_BUGVERBOSE consistently
This means that in inline impact of BUG is just the illegal instruction
itself, which is an improvement for i386 and x86-64.
A BUG is represented in the instruction stream as an illegal instruction,
which has file/line information associated with it. This extra information is
stored in the __bug_table section in the ELF file.
When the kernel gets an illegal instruction, it first confirms it might
possibly be from a BUG (ie, in kernel mode, the right illegal instruction).
It then calls report_bug(). This searches __bug_table for a matching
instruction pointer, and if found, prints the corresponding file/line
information. If report_bug() determines that it wasn't a BUG which caused the
trap, it returns BUG_TRAP_TYPE_NONE.
Some architectures (powerpc) implement WARN using the same mechanism; if the
illegal instruction was the result of a WARN, then report_bug(Q) returns
CONFIG_DEBUG_BUGVERBOSE; otherwise it returns BUG_TRAP_TYPE_BUG.
lib/bug.c keeps a list of loaded modules which can be searched for __bug_table
entries. The architecture must call
module_bug_finalize()/module_bug_cleanup() from its corresponding
module_finalize/cleanup functions.
Unsetting CONFIG_DEBUG_BUGVERBOSE will reduce the kernel size by some amount.
At the very least, filename and line information will not be recorded for each
but, but architectures may decide to store no extra information per BUG at
all.
Unfortunately, gcc doesn't have a general way to mark an asm() as noreturn, so
architectures will generally have to include an infinite loop (or similar) in
the BUG code, so that gcc knows execution won't continue beyond that point.
gcc does have a __builtin_trap() operator which may be useful to achieve the
same effect, unfortunately it cannot be used to actually implement the BUG
itself, because there's no way to get the instruction's address for use in
generating the __bug_table entry.
[randy.dunlap@oracle.com: Handle BUG=n, GENERIC_BUG=n to prevent build errors]
[bunk@stusta.de: include/linux/bug.h must always #include <linux/module.h]
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
18 years ago
|
|
|
*(__bug_table) \
|
|
|
|
VMLINUX_SYMBOL(__stop___bug_table) = .; \
|
[PATCH] Generic BUG implementation
This patch adds common handling for kernel BUGs, for use by architectures as
they wish. The code is derived from arch/powerpc.
The advantages of having common BUG handling are:
- consistent BUG reporting across architectures
- shared implementation of out-of-line file/line data
- implement CONFIG_DEBUG_BUGVERBOSE consistently
This means that in inline impact of BUG is just the illegal instruction
itself, which is an improvement for i386 and x86-64.
A BUG is represented in the instruction stream as an illegal instruction,
which has file/line information associated with it. This extra information is
stored in the __bug_table section in the ELF file.
When the kernel gets an illegal instruction, it first confirms it might
possibly be from a BUG (ie, in kernel mode, the right illegal instruction).
It then calls report_bug(). This searches __bug_table for a matching
instruction pointer, and if found, prints the corresponding file/line
information. If report_bug() determines that it wasn't a BUG which caused the
trap, it returns BUG_TRAP_TYPE_NONE.
Some architectures (powerpc) implement WARN using the same mechanism; if the
illegal instruction was the result of a WARN, then report_bug(Q) returns
CONFIG_DEBUG_BUGVERBOSE; otherwise it returns BUG_TRAP_TYPE_BUG.
lib/bug.c keeps a list of loaded modules which can be searched for __bug_table
entries. The architecture must call
module_bug_finalize()/module_bug_cleanup() from its corresponding
module_finalize/cleanup functions.
Unsetting CONFIG_DEBUG_BUGVERBOSE will reduce the kernel size by some amount.
At the very least, filename and line information will not be recorded for each
but, but architectures may decide to store no extra information per BUG at
all.
Unfortunately, gcc doesn't have a general way to mark an asm() as noreturn, so
architectures will generally have to include an infinite loop (or similar) in
the BUG code, so that gcc knows execution won't continue beyond that point.
gcc does have a __builtin_trap() operator which may be useful to achieve the
same effect, unfortunately it cannot be used to actually implement the BUG
itself, because there's no way to get the instruction's address for use in
generating the __bug_table entry.
[randy.dunlap@oracle.com: Handle BUG=n, GENERIC_BUG=n to prevent build errors]
[bunk@stusta.de: include/linux/bug.h must always #include <linux/module.h]
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
18 years ago
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define BUG_TABLE
|
|
|
|
#endif
|
[PATCH] Generic BUG implementation
This patch adds common handling for kernel BUGs, for use by architectures as
they wish. The code is derived from arch/powerpc.
The advantages of having common BUG handling are:
- consistent BUG reporting across architectures
- shared implementation of out-of-line file/line data
- implement CONFIG_DEBUG_BUGVERBOSE consistently
This means that in inline impact of BUG is just the illegal instruction
itself, which is an improvement for i386 and x86-64.
A BUG is represented in the instruction stream as an illegal instruction,
which has file/line information associated with it. This extra information is
stored in the __bug_table section in the ELF file.
When the kernel gets an illegal instruction, it first confirms it might
possibly be from a BUG (ie, in kernel mode, the right illegal instruction).
It then calls report_bug(). This searches __bug_table for a matching
instruction pointer, and if found, prints the corresponding file/line
information. If report_bug() determines that it wasn't a BUG which caused the
trap, it returns BUG_TRAP_TYPE_NONE.
Some architectures (powerpc) implement WARN using the same mechanism; if the
illegal instruction was the result of a WARN, then report_bug(Q) returns
CONFIG_DEBUG_BUGVERBOSE; otherwise it returns BUG_TRAP_TYPE_BUG.
lib/bug.c keeps a list of loaded modules which can be searched for __bug_table
entries. The architecture must call
module_bug_finalize()/module_bug_cleanup() from its corresponding
module_finalize/cleanup functions.
Unsetting CONFIG_DEBUG_BUGVERBOSE will reduce the kernel size by some amount.
At the very least, filename and line information will not be recorded for each
but, but architectures may decide to store no extra information per BUG at
all.
Unfortunately, gcc doesn't have a general way to mark an asm() as noreturn, so
architectures will generally have to include an infinite loop (or similar) in
the BUG code, so that gcc knows execution won't continue beyond that point.
gcc does have a __builtin_trap() operator which may be useful to achieve the
same effect, unfortunately it cannot be used to actually implement the BUG
itself, because there's no way to get the instruction's address for use in
generating the __bug_table entry.
[randy.dunlap@oracle.com: Handle BUG=n, GENERIC_BUG=n to prevent build errors]
[bunk@stusta.de: include/linux/bug.h must always #include <linux/module.h]
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
18 years ago
|
|
|
|
|
|
|
#ifdef CONFIG_PM_TRACE
|
|
|
|
#define TRACEDATA \
|
|
|
|
. = ALIGN(4); \
|
|
|
|
.tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__tracedata_start) = .; \
|
|
|
|
*(.tracedata) \
|
|
|
|
VMLINUX_SYMBOL(__tracedata_end) = .; \
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define TRACEDATA
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define NOTES \
|
|
|
|
.notes : AT(ADDR(.notes) - LOAD_OFFSET) { \
|
|
|
|
VMLINUX_SYMBOL(__start_notes) = .; \
|
|
|
|
*(.note.*) \
|
|
|
|
VMLINUX_SYMBOL(__stop_notes) = .; \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define INITCALLS \
|
|
|
|
*(.initcallearly.init) \
|
|
|
|
VMLINUX_SYMBOL(__early_initcall_end) = .; \
|
|
|
|
*(.initcall0.init) \
|
|
|
|
*(.initcall0s.init) \
|
|
|
|
*(.initcall1.init) \
|
|
|
|
*(.initcall1s.init) \
|
|
|
|
*(.initcall2.init) \
|
|
|
|
*(.initcall2s.init) \
|
|
|
|
*(.initcall3.init) \
|
|
|
|
*(.initcall3s.init) \
|
|
|
|
*(.initcall4.init) \
|
|
|
|
*(.initcall4s.init) \
|
|
|
|
*(.initcall5.init) \
|
|
|
|
*(.initcall5s.init) \
|
|
|
|
*(.initcallrootfs.init) \
|
|
|
|
*(.initcall6.init) \
|
|
|
|
*(.initcall6s.init) \
|
|
|
|
*(.initcall7.init) \
|
|
|
|
*(.initcall7s.init)
|
|
|
|
|
|
|
|
#define PERCPU(align) \
|
|
|
|
. = ALIGN(align); \
|
|
|
|
VMLINUX_SYMBOL(__per_cpu_start) = .; \
|
|
|
|
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
|
|
|
|
*(.data.percpu.page_aligned) \
|
|
|
|
*(.data.percpu) \
|
|
|
|
*(.data.percpu.shared_aligned) \
|
|
|
|
} \
|
|
|
|
VMLINUX_SYMBOL(__per_cpu_end) = .;
|