Skip to content

Commit

Permalink
Merge pull request #41685 from JuliaLang/vc/gc_probes
Browse files Browse the repository at this point in the history
Add support for USDT probes
  • Loading branch information
vchuravy authored Jul 29, 2021
2 parents d066a8e + 66b40ad commit fa30e64
Show file tree
Hide file tree
Showing 11 changed files with 357 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*.so
*.dylib
*.dSYM
*.h.gen
*.jl.cov
*.jl.*.cov
*.jl.mem
Expand Down
13 changes: 13 additions & 0 deletions Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ HAVE_SSP := 0
WITH_GC_VERIFY := 0
WITH_GC_DEBUG_ENV := 0

# Enable DTrace support
WITH_DTRACE := 0

# Prevent picking up $ARCH from the environment variables
ARCH:=

Expand Down Expand Up @@ -759,6 +762,13 @@ JCXXFLAGS += -DGC_DEBUG_ENV
JCFLAGS += -DGC_DEBUG_ENV
endif

ifeq ($(WITH_DTRACE), 1)
JCXXFLAGS += -DUSE_DTRACE
JCFLAGS += -DUSE_DTRACE
DTRACE := dtrace
else
endif

# ===========================================================================

# Select the cpu architecture to target, or automatically detects the user's compiler
Expand Down Expand Up @@ -1555,6 +1565,7 @@ LINKCOLOR:="\033[34;1m"
PERLCOLOR:="\033[35m"
FLISPCOLOR:="\033[32m"
JULIACOLOR:="\033[32;1m"
DTRACECOLOR:="\033[32;1m"

SRCCOLOR:="\033[33m"
BINCOLOR:="\033[37;1m"
Expand All @@ -1568,6 +1579,7 @@ PRINT_LINK = printf ' %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$(GOAL)
PRINT_PERL = printf ' %b %b\n' $(PERLCOLOR)PERL$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
PRINT_FLISP = printf ' %b %b\n' $(FLISPCOLOR)FLISP$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
PRINT_JULIA = printf ' %b %b\n' $(JULIACOLOR)JULIA$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
PRINT_DTRACE = printf ' %b %b\n' $(DTRACECOLOR)DTRACE$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
else
QUIET_MAKE =
Expand All @@ -1577,6 +1589,7 @@ PRINT_LINK = echo '$(subst ','\'',$(1))'; $(1)
PRINT_PERL = echo '$(subst ','\'',$(1))'; $(1)
PRINT_FLISP = echo '$(subst ','\'',$(1))'; $(1)
PRINT_JULIA = echo '$(subst ','\'',$(1))'; $(1)
PRINT_DTRACE = echo '$(subst ','\'',$(1))'; $(1)

endif

Expand Down
44 changes: 44 additions & 0 deletions contrib/bpftrace/gc_all.bt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bpftrace

BEGIN
{
printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
}

usdt:usr/lib/libjulia-internal.so:julia:gc__begin
{
$now = nsecs;
@time[pid] = $now;
@start[pid] = $now;
}

usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
/@start[pid]/
{
$now = nsecs;
@stop_the_world_usecs[pid] = hist(($now - @time[pid]) / 1000);
@time[pid] = $now;
}

usdt:usr/lib/libjulia-internal.so:julia:gc__end
/@start[pid]/
{
$now = nsecs;
@gc_total_usecs[pid] = hist(($now - @start[pid]) / 1000);
@gc_phase_usecs[pid] = hist(($now - @time[pid]) / 1000);
@time[pid] = $now;
delete(@start[pid]);
}

usdt:usr/lib/libjulia-internal.so:julia:gc__finalizer
/@time[pid]/
{
@finalizer[pid] = hist((nsecs - @time[pid]) / 1000);
delete(@time[pid]);
}

END
{
clear(@start);
clear(@time);
}
23 changes: 23 additions & 0 deletions contrib/bpftrace/gc_simple.bt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bpftrace

BEGIN
{
printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
}

usdt:usr/lib/libjulia-internal.so:julia:gc__begin
{
@start[pid] = nsecs;
}

usdt:usr/lib/libjulia-internal.so:julia:gc__end
/@start[pid]/
{
@usecs[pid] = hist((nsecs - @start[pid]) / 1000);
delete(@start[pid]);
}

END
{
clear(@start);
}
23 changes: 23 additions & 0 deletions contrib/bpftrace/gc_stop_the_world_latency.bt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bpftrace

BEGIN
{
printf("Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.\n");
}

usdt:usr/lib/libjulia-internal.so:julia:gc__begin
{
@start[pid] = nsecs;
}

usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
/@start[pid]/
{
@usecs[pid] = hist((nsecs - @start[pid]) / 1000);
delete(@start[pid]);
}

END
{
clear(@start);
}
1 change: 1 addition & 0 deletions doc/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ DevDocs = [
"devdocs/debuggingtips.md",
"devdocs/valgrind.md",
"devdocs/sanitizers.md",
"devdocs/probes.md"
]
]

Expand Down
168 changes: 168 additions & 0 deletions doc/src/devdocs/probes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Instrumenting Julia with DTrace, and bpftrace

DTrace and bpftrace are tools that enable lightweight instrumentation of processes.
You can turn the instrumentation on and off while the process is running,
and with instrumentation off the overhead is minimal.

!!! compat "Julia 1.8"
Support for probes was added in Julia 1.8

!!! note
This documentation has been written from a Linux perspective, most of this
should hold on Mac OS/Darwin and FreeBSD.

## Enabling support

On Linux install the `systemtap` package that has a version of `dtrace`.

```
WITH_DTRACE=1
```

### Verifying

```
> readelf -n usr/lib/libjulia-internal.so.1
Displaying notes found in: .note.gnu.build-id
Owner Data size Description
GNU 0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring)
Build ID: 57161002f35548772a87418d2385c284ceb3ead8
Displaying notes found in: .note.stapsdt
Owner Data size Description
stapsdt 0x00000029 NT_STAPSDT (SystemTap probe descriptors)
Provider: julia
Name: gc__begin
Location: 0x000000000013213e, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cac
Arguments:
stapsdt 0x00000032 NT_STAPSDT (SystemTap probe descriptors)
Provider: julia
Name: gc__stop_the_world
Location: 0x0000000000132144, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cae
Arguments:
stapsdt 0x00000027 NT_STAPSDT (SystemTap probe descriptors)
Provider: julia
Name: gc__end
Location: 0x000000000013214a, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb0
Arguments:
stapsdt 0x0000002d NT_STAPSDT (SystemTap probe descriptors)
Provider: julia
Name: gc__finalizer
Location: 0x0000000000132150, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb2
Arguments:
```

## Adding probes in libjulia

Probes are declared in dtraces format in the file `src/uprobes.d`. The generated
header file is included in `src/julia_internal.h` and if you add probes you should
provide a noop implementation there.

The header will contain a semaphore `*_ENABLED` and the actual call to the probe.
If the probe arguments are expensive to compute you should first check if the
probe is enabled and then compute the arguments and call the probe.

```c
if (JL_PROBE_{PROBE}_ENABLED())
auto expensive_arg = ...;
JL_PROBE_{PROBE}(expensive_arg);
```
If your probe has no arguments it is preferred to not include the semaphore check.
With USDT probes enabled the cost of a semaphore is a memory load, irrespective of
the fact that the probe is enabled or not.
```c
#define JL_PROBE_GC_BEGIN_ENABLED() __builtin_expect (julia_gc__begin_semaphore, 0)
__extension__ extern unsigned short julia_gc__begin_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));
```

Whereas the probe itself is a noop sled that will be patched to a trampoline to
the probe handler.

## Available probes

### GC probes

1. `julia:gc__begin`: GC begins running on one thread and triggers stop-the-world.
2. `julia:gc__stop_the_world`: All threads have reached a safepoint and GC runs.
3. `julia:gc__mark__begin`: Beginning the mark phase
4. `julia:gc__mark_end(scanned_bytes, perm_scanned)`: Mark phase ended
5. `julia:gc__sweep_begin(full)`: Starting sweep
6. `julia:gc__sweep_end()`: Sweep phase finished
7. `julia:gc__end`: GC is finished, other threads continue work
8. `julia:gc__finalizer`: Initial GC thread has finished running finalizers

#### GC stop-the-world latency

An example `bpftrace` script is given in `contrib/gc_stop_the_world_latency.bt`
and it creates a histogram of the latency for all threads to reach a safepoint.

Running this Julia code, with `julia -t 2`

```
using Base.Threads
fib(x) = x <= 1 ? 1 : fib(x-1) + fib(x-2)
beaver = @spawn begin
while true
fib(30)
# This safepoint is necessary until #41616, since otherwise this
# loop will never yield to GC.
GC.safepoint()
end
end
allocator = @spawn begin
while true
zeros(1024)
end
end
wait(allocator)
```

and in a second terminal

```
> sudo contrib/bpftrace/gc_stop_the_world_latency.bt
Attaching 4 probes...
Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.
^C
@usecs[1743412]:
[4, 8) 971 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[8, 16) 837 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
[16, 32) 129 |@@@@@@ |
[32, 64) 10 | |
[64, 128) 1 | |
```

We can see the latency distribution of the stop-the-world phase in the executed Julia process.

## Notes on using `bpftrace`

An example probe in the bpftrace format looks like:

```
usdt:usr/lib/libjulia-internal.so:julia:gc__begin
{
@start[pid] = nsecs;
}
```

The probe declaration takes the kind `usdt`, then either the
path to the library or the PID, the provider name `julia`
and the probe name `gc__begin`. Note that I am using a
relative path to the `libjulia-internal.so`, but this might
need to be an absolute path on a production system.

## Useful references:

- [Julia Evans blog on Linux tracing systems](https://jvns.ca/blog/2017/07/05/linux-tracing-systems)
- [LWN article on USDT and BPF](https://lwn.net/Articles/753601/)
- [GDB support for probes](https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html)
- [Brendan Gregg -- Linux Performance](https://www.brendangregg.com/linuxperf.html)
21 changes: 20 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,18 @@ endif

SRCS += $(RUNTIME_SRCS)

ifeq ($(WITH_DTRACE),1)
DTRACE_HEADERS := uprobes.h.gen
ifneq ($(OS),Darwin)
SRCS += uprobes
endif
else
DTRACE_HEADERS :=
endif

# headers are used for dependency tracking, while public headers will be part of the dist
UV_HEADERS :=
HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h locks.h atomics.h julia_internal.h options.h timing.h)
HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h locks.h atomics.h julia_internal.h options.h timing.h) $(addprefix $(BUILDDIR)/, $(DTRACE_HEADERS))
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h locks.h atomics.h julia_gcext.h)
ifeq ($(USE_SYSTEM_LIBUV),0)
UV_HEADERS += uv.h
Expand Down Expand Up @@ -163,6 +171,13 @@ $(BUILDDIR):

LLVM_CONFIG_ABSOLUTE := $(shell which $(LLVM_CONFIG))

# Generate the DTrace header file, while also renaming the macros from
# JULIA_ to JL_PROBE to clearly delinate them.
$(BUILDDIR)/%.h.gen : $(SRCDIR)/%.d
@$(call PRINT_DTRACE, $(DTRACE) -h -s $< -o $@)
sed 's/JULIA_/JL_PROBE_/' $@ > $@.tmp
mv $@.tmp $@

# source file rules
$(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
Expand All @@ -172,6 +187,10 @@ $(BUILDDIR)/%.o: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONF
@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(SHIPFLAGS) $(CXX_DISABLE_ASSERTION) -c $< -o $@)
$(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONFIG_ABSOLUTE) | $(BUILDDIR)
@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -c $< -o $@)
$(BUILDDIR)/%.o : $(SRCDIR)/%.d
@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
$(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d
@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)

# public header rules
$(eval $(call dir_target,$(build_includedir)/julia))
Expand Down
Loading

0 comments on commit fa30e64

Please sign in to comment.