Merge pull request #7010 from libgit2/ethomson/index-bench

Add profiling data to benchmarks
This commit is contained in:
Edward Thomson
2025-01-13 23:19:48 +00:00
committed by GitHub
43 changed files with 54043 additions and 92 deletions

View File

@@ -9,6 +9,9 @@ on:
debug:
type: boolean
description: Debugging output
deploy:
type: boolean
description: Deploy the benchmark site
schedule:
- cron: '15 4 * * *'
@@ -34,16 +37,16 @@ jobs:
setup-script: ubuntu
env:
CC: clang
CMAKE_OPTIONS: -DUSE_HTTPS=OpenSSL -DREGEX_BACKEND=builtin -DDEPRECATE_HARD=ON -DUSE_GSSAPI=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=Release
CMAKE_BUILD_OPTIONS: --config Release
CMAKE_OPTIONS: -DUSE_HTTPS=OpenSSL -DREGEX_BACKEND=builtin -DDEPRECATE_HARD=ON -DUSE_GSSAPI=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
CMAKE_BUILD_OPTIONS: --config RelWithDebInfo
- name: "macOS"
id: macos
os: macos-latest
setup-script: osx
env:
CC: clang
CMAKE_OPTIONS: -DREGEX_BACKEND=regcomp_l -DDEPRECATE_HARD=ON -DUSE_GSSAPI=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=Release
CMAKE_BUILD_OPTIONS: --config Release
CMAKE_OPTIONS: -DREGEX_BACKEND=regcomp_l -DDEPRECATE_HARD=ON -DUSE_GSSAPI=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
CMAKE_BUILD_OPTIONS: --config RelWithDebInfo
PKG_CONFIG_PATH: /usr/local/opt/openssl/lib/pkgconfig
- name: "Windows (amd64, Visual Studio)"
id: windows
@@ -52,8 +55,8 @@ jobs:
env:
ARCH: amd64
CMAKE_GENERATOR: Visual Studio 17 2022
CMAKE_OPTIONS: -A x64 -DDEPRECATE_HARD=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=Release
CMAKE_BUILD_OPTIONS: --config Release
CMAKE_OPTIONS: -A x64 -DDEPRECATE_HARD=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
CMAKE_BUILD_OPTIONS: --config RelWithDebInfo
fail-fast: false
name: "Benchmark ${{ matrix.platform.name }}"
env: ${{ matrix.platform.env }}
@@ -70,9 +73,18 @@ jobs:
if: matrix.platform.setup-script != ''
- name: Clone resource repositories
run: |
mkdir resources
# TODO:
# we need a superior way to package the benchmark resources; lfs
# is too expensive
# git lfs install
# git clone https://github.com/libgit2/benchmark-resources resources
git clone --bare https://github.com/git/git resources/git
git clone --bare https://github.com/torvalds/linux resources/linux
# TODO:
# avoid linux temporarily; the linux blame benchmarks are simply
# too slow to use
# git clone --bare https://github.com/torvalds/linux resources/linux
- name: Build
run: |
mkdir build && cd build
@@ -80,13 +92,16 @@ jobs:
shell: bash
- name: Benchmark
run: |
export BENCHMARK_GIT_REPOSITORY="$(pwd)/resources/git"
# TODO:
# avoid benchmark resource path currently
#export BENCHMARK_RESOURCES_PATH="$(pwd)/resources"
export BENCHMARK_GIT_PATH="$(pwd)/resources/git"
# avoid linux temporarily; the linux blame benchmarks are simply
# too slow to use
# export BENCHMARK_LINUX_REPOSITORY="$(pwd)/resources/linux"
# export BENCHMARK_LINUX_PATH="$(pwd)/resources/linux"
if [[ "$(uname -s)" == MINGW* ]]; then
GIT2_CLI="$(cygpath -w $(pwd))\\build\\Release\\git2"
GIT2_CLI="$(cygpath -w $(pwd))\\build\\RelWithDebInfo\\git2"
else
GIT2_CLI="$(pwd)/build/git2"
fi
@@ -103,7 +118,7 @@ jobs:
../source/tests/benchmarks/benchmark.sh \
${SUITE_FLAG} ${DEBUG_FLAG} \
--baseline-cli "git" --cli "${GIT2_CLI}" --name libgit2 \
--json benchmarks.json --zip benchmarks.zip
--json benchmarks.json --flamegraph --zip benchmarks.zip
shell: bash
- name: Upload results
uses: actions/upload-artifact@v4
@@ -116,7 +131,7 @@ jobs:
publish:
name: Publish results
needs: [ build ]
if: always() && github.repository == 'libgit2/libgit2' && github.event_name == 'schedule'
if: always() && github.repository == 'libgit2/libgit2'
runs-on: ubuntu-latest
steps:
- name: Check out benchmark repository
@@ -128,40 +143,58 @@ jobs:
ssh-key: ${{ secrets.BENCHMARKS_PUBLISH_KEY }}
- name: Download test results
uses: actions/download-artifact@v4
- name: Generate API
run: |
# Move today's benchmark run into the right place
for platform in linux macos windows; do
TIMESTAMP=$(jq .time.start < "benchmark-${platform}/benchmarks.json")
TIMESTAMP_LEN=$(echo -n ${TIMESTAMP} | wc -c | xargs)
DENOMINATOR=1
if [ "${TIMESTAMP_LEN}" = "19" ]; then
DENOMINATOR="1000000000"
elif [ "${TIMESTAMP_LEN}" = "13" ]; then
DENOMINATOR="1000"
else
echo "unknown timestamp"
exit 1
fi
if [[ "$(uname -s)" == "Darwin" ]]; then
DATE=$(date -R -r $(("${TIMESTAMP}/${DENOMINATOR}")) +"%Y-%m-%d")
else
DATE=$(date -d @$(("${TIMESTAMP}/${DENOMINATOR}")) +"%Y-%m-%d")
fi
# move the complete results in
mkdir -p "site/public/api/runs/${DATE}"
cp "benchmark-${platform}/benchmarks.json" "site/public/api/runs/${DATE}/${platform}.json"
# unzip the individual results
PLATFORM_TEMP=$(mktemp -d)
unzip "benchmark-${platform}/benchmarks.zip" -d "${PLATFORM_TEMP}"
mkdir -p "site/public/api/runs/${DATE}/${platform}"
find "${PLATFORM_TEMP}" -name \*\.svg -exec cp {} "site/public/api/runs/${DATE}/${platform}" \;
done
(cd site && node scripts/aggregate.js)
shell: bash
# in debug mode, don't deploy the site; only create a zip file and
# upload it for debugging
- name: Upload site
uses: actions/upload-artifact@v4
with:
name: site
path: site
if: github.event_name == 'workflow_dispatch'
- name: Publish API
run: |
# Move today's benchmark run into the right place
for platform in linux macos windows; do
TIMESTAMP=$(jq .time.start < "benchmark-${platform}/benchmarks.json")
TIMESTAMP_LEN=$(echo -n ${TIMESTAMP} | wc -c | xargs)
DENOMINATOR=1
if [ "${TIMESTAMP_LEN}" = "19" ]; then
DENOMINATOR="1000000000"
elif [ "${TIMESTAMP_LEN}" = "13" ]; then
DENOMINATOR="1000"
else
echo "unknown timestamp"
exit 1
fi
if [[ "$(uname -s)" == "Darwin" ]]; then
DATE=$(date -R -r $(("${TIMESTAMP}/${DENOMINATOR}")) +"%Y-%m-%d")
else
DATE=$(date -d @$(("${TIMESTAMP}/${DENOMINATOR}")) +"%Y-%m-%d")
fi
mkdir -p "site/public/api/runs/${DATE}"
cp "benchmark-${platform}/benchmarks.json" "site/public/api/runs/${DATE}/${platform}.json"
done
(cd site && node scripts/aggregate.js)
(
cd site &&
git config user.name 'Benchmark Site Generation' &&
git config user.email 'libgit2@users.noreply.github.com' &&
git add . &&
git commit --allow-empty -m"benchmark update ${DATE}" &&
git push origin main
)
shell: bash
working-directory: site
if: github.event_name == 'schedule' || github.event.inputs.deploy == 'true'

View File

@@ -18,3 +18,6 @@ sudo apt-get install -y --no-install-recommends \
wget https://github.com/sharkdp/hyperfine/releases/download/v1.12.0/hyperfine_1.12.0_amd64.deb
sudo dpkg -i hyperfine_1.12.0_amd64.deb
echo -n "Setting performance events availability to: "
echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid

View File

@@ -0,0 +1,226 @@
# Flame Graphs visualize profiled code
Main Website: http://www.brendangregg.com/flamegraphs.html
Example (click to zoom):
[![Example](http://www.brendangregg.com/FlameGraphs/cpu-bash-flamegraph.svg)](http://www.brendangregg.com/FlameGraphs/cpu-bash-flamegraph.svg)
Click a box to zoom the Flame Graph to this stack frame only.
To search and highlight all stack frames matching a regular expression, click the _search_ button in the upper right corner or press Ctrl-F.
By default, search is case sensitive, but this can be toggled by pressing Ctrl-I or by clicking the _ic_ button in the upper right corner.
Other sites:
- The Flame Graph article in ACMQ and CACM: http://queue.acm.org/detail.cfm?id=2927301 http://cacm.acm.org/magazines/2016/6/202665-the-flame-graph/abstract
- CPU profiling using Linux perf\_events, DTrace, SystemTap, or ktap: http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html
- CPU profiling using XCode Instruments: http://schani.wordpress.com/2012/11/16/flame-graphs-for-instruments/
- CPU profiling using Xperf.exe: http://randomascii.wordpress.com/2013/03/26/summarizing-xperf-cpu-usage-with-flame-graphs/
- Memory profiling: http://www.brendangregg.com/FlameGraphs/memoryflamegraphs.html
- Other examples, updates, and news: http://www.brendangregg.com/flamegraphs.html#Updates
Flame graphs can be created in three steps:
1. Capture stacks
2. Fold stacks
3. flamegraph.pl
1\. Capture stacks
=================
Stack samples can be captured using Linux perf\_events, FreeBSD pmcstat (hwpmc), DTrace, SystemTap, and many other profilers. See the stackcollapse-\* converters.
### Linux perf\_events
Using Linux perf\_events (aka "perf") to capture 60 seconds of 99 Hertz stack samples, both user- and kernel-level stacks, all processes:
```
# perf record -F 99 -a -g -- sleep 60
# perf script > out.perf
```
Now only capturing PID 181:
```
# perf record -F 99 -p 181 -g -- sleep 60
# perf script > out.perf
```
### DTrace
Using DTrace to capture 60 seconds of kernel stacks at 997 Hertz:
```
# dtrace -x stackframes=100 -n 'profile-997 /arg0/ { @[stack()] = count(); } tick-60s { exit(0); }' -o out.kern_stacks
```
Using DTrace to capture 60 seconds of user-level stacks for PID 12345 at 97 Hertz:
```
# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345 && arg1/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
```
60 seconds of user-level stacks, including time spent in-kernel, for PID 12345 at 97 Hertz:
```
# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
```
Switch `ustack()` for `jstack()` if the application has a ustack helper to include translated frames (eg, node.js frames; see: http://dtrace.org/blogs/dap/2012/01/05/where-does-your-node-program-spend-its-time/). The rate for user-level stack collection is deliberately slower than kernel, which is especially important when using `jstack()` as it performs additional work to translate frames.
2\. Fold stacks
==============
Use the stackcollapse programs to fold stack samples into single lines. The programs provided are:
- `stackcollapse.pl`: for DTrace stacks
- `stackcollapse-perf.pl`: for Linux perf_events "perf script" output
- `stackcollapse-pmc.pl`: for FreeBSD pmcstat -G stacks
- `stackcollapse-stap.pl`: for SystemTap stacks
- `stackcollapse-instruments.pl`: for XCode Instruments
- `stackcollapse-vtune.pl`: for Intel VTune profiles
- `stackcollapse-ljp.awk`: for Lightweight Java Profiler
- `stackcollapse-jstack.pl`: for Java jstack(1) output
- `stackcollapse-gdb.pl`: for gdb(1) stacks
- `stackcollapse-go.pl`: for Golang pprof stacks
- `stackcollapse-vsprof.pl`: for Microsoft Visual Studio profiles
- `stackcollapse-wcp.pl`: for wallClockProfiler output
Usage example:
```
For perf_events:
$ ./stackcollapse-perf.pl out.perf > out.folded
For DTrace:
$ ./stackcollapse.pl out.kern_stacks > out.kern_folded
```
The output looks like this:
```
unix`_sys_sysenter_post_swapgs 1401
unix`_sys_sysenter_post_swapgs;genunix`close 5
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf 85
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_closef 26
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_setf 5
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_getstate 6
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_unfalloc 2
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`closef 48
[...]
```
3\. flamegraph.pl
================
Use flamegraph.pl to render a SVG.
```
$ ./flamegraph.pl out.kern_folded > kernel.svg
```
An advantage of having the folded input file (and why this is separate to flamegraph.pl) is that you can use grep for functions of interest. Eg:
```
$ grep cpuid out.kern_folded | ./flamegraph.pl > cpuid.svg
```
Provided Examples
=================
### Linux perf\_events
An example output from Linux "perf script" is included, gzip'd, as example-perf-stacks.txt.gz. The resulting flame graph is example-perf.svg:
[![Example](http://www.brendangregg.com/FlameGraphs/example-perf.svg)](http://www.brendangregg.com/FlameGraphs/example-perf.svg)
You can create this using:
```
$ gunzip -c example-perf-stacks.txt.gz | ./stackcollapse-perf.pl --all | ./flamegraph.pl --color=java --hash > example-perf.svg
```
This shows my typical workflow: I'll gzip profiles on the target, then copy them to my laptop for analysis. Since I have hundreds of profiles, I leave them gzip'd!
Since this profile included Java, I used the flamegraph.pl --color=java palette. I've also used stackcollapse-perf.pl --all, which includes all annotations that help flamegraph.pl use separate colors for kernel and user level code. The resulting flame graph uses: green == Java, yellow == C++, red == user-mode native, orange == kernel.
This profile was from an analysis of vert.x performance. The benchmark client, wrk, is also visible in the flame graph.
### DTrace
An example output from DTrace is also included, example-dtrace-stacks.txt, and the resulting flame graph, example-dtrace.svg:
[![Example](http://www.brendangregg.com/FlameGraphs/example-dtrace.svg)](http://www.brendangregg.com/FlameGraphs/example-dtrace.svg)
You can generate this using:
```
$ ./stackcollapse.pl example-stacks.txt | ./flamegraph.pl > example.svg
```
This was from a particular performance investigation: the Flame Graph identified that CPU time was spent in the lofs module, and quantified that time.
Options
=======
See the USAGE message (--help) for options:
USAGE: ./flamegraph.pl [options] infile > outfile.svg
--title TEXT # change title text
--subtitle TEXT # second level title (optional)
--width NUM # width of image (default 1200)
--height NUM # height of each frame (default 16)
--minwidth NUM # omit smaller functions. In pixels or use "%" for
# percentage of time (default 0.1 pixels)
--fonttype FONT # font type (default "Verdana")
--fontsize NUM # font size (default 12)
--countname TEXT # count type label (default "samples")
--nametype TEXT # name type label (default "Function:")
--colors PALETTE # set color palette. choices are: hot (default), mem,
# io, wakeup, chain, java, js, perl, red, green, blue,
# aqua, yellow, purple, orange
--bgcolors COLOR # set background colors. gradient choices are yellow
# (default), blue, green, grey; flat colors use "#rrggbb"
--hash # colors are keyed by function name hash
--cp # use consistent palette (palette.map)
--reverse # generate stack-reversed flame graph
--inverted # icicle graph
--flamechart # produce a flame chart (sort by time, do not merge stacks)
--negate # switch differential hues (blue<->red)
--notes TEXT # add notes comment in SVG (for debugging)
--help # this message
eg,
./flamegraph.pl --title="Flame Graph: malloc()" trace.txt > graph.svg
As suggested in the example, flame graphs can process traces of any event,
such as malloc()s, provided stack traces are gathered.
Consistent Palette
==================
If you use the `--cp` option, it will use the $colors selection and randomly
generate the palette like normal. Any future flamegraphs created using the `--cp`
option will use the same palette map. Any new symbols from future flamegraphs
will have their colors randomly generated using the $colors selection.
If you don't like the palette, just delete the palette.map file.
This allows your to change your colorscheme between flamegraphs to make the
differences REALLY stand out.
Example:
Say we have 2 captures, one with a problem, and one when it was working
(whatever "it" is):
```
cat working.folded | ./flamegraph.pl --cp > working.svg
# this generates a palette.map, as per the normal random generated look.
cat broken.folded | ./flamegraph.pl --cp --colors mem > broken.svg
# this svg will use the same palette.map for the same events, but a very
# different colorscheme for any new events.
```
Take a look at the demo directory for an example:
palette-example-working.svg
palette-example-broken.svg

View File

@@ -0,0 +1,31 @@
#!/usr/bin/perl
use Getopt::Std;
getopt('urt');
unless ($opt_r && $opt_t){
print "Usage: $0 [ -u user] -r sample_count -t sleep_time\n";
exit(0);
}
my $i;
my @proc = "";
for ($i = 0; $i < $opt_r ; $i++){
if ($opt_u){
$proc = `/usr/sysv/bin/ps -u $opt_u `;
$proc =~ s/^.*\n//;
$proc =~ s/\s*(\d+).*\n/\1 /g;
@proc = split(/\s+/,$proc);
} else {
opendir(my $dh, '/proc') || die "Cant't open /proc: $!";
@proc = grep { /^[\d]+$/ } readdir($dh);
closedir ($dh);
}
foreach my $pid (@proc){
my $command = "/usr/bin/procstack $pid";
print `$command 2>/dev/null`;
}
select(undef, undef, undef, $opt_t);
}

View File

@@ -0,0 +1,115 @@
#!/usr/bin/perl -w
#
# difffolded.pl diff two folded stack files. Use this for generating
# flame graph differentials.
#
# USAGE: ./difffolded.pl [-hns] folded1 folded2 | ./flamegraph.pl > diff2.svg
#
# Options are described in the usage message (-h).
#
# The flamegraph will be colored based on higher samples (red) and smaller
# samples (blue). The frame widths will be based on the 2nd folded file.
# This might be confusing if stack frames disappear entirely; it will make
# the most sense to ALSO create a differential based on the 1st file widths,
# while switching the hues; eg:
#
# ./difffolded.pl folded2 folded1 | ./flamegraph.pl --negate > diff1.svg
#
# Here's what they mean when comparing a before and after profile:
#
# diff1.svg: widths show the before profile, colored by what WILL happen
# diff2.svg: widths show the after profile, colored by what DID happen
#
# INPUT: See stackcollapse* programs.
#
# OUTPUT: The full list of stacks, with two columns, one from each file.
# If a stack wasn't present in a file, the column value is zero.
#
# folded_stack_trace count_from_folded1 count_from_folded2
#
# eg:
#
# funca;funcb;funcc 31 33
# ...
#
# COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# (http://www.gnu.org/copyleft/gpl.html)
#
# 28-Oct-2014 Brendan Gregg Created this.
use strict;
use Getopt::Std;
# defaults
my $normalize = 0; # make sample counts equal
my $striphex = 0; # strip hex numbers
sub usage {
print STDERR <<USAGE_END;
USAGE: $0 [-hns] folded1 folded2 | flamegraph.pl > diff2.svg
-h # help message
-n # normalize sample counts
-s # strip hex numbers (addresses)
See stackcollapse scripts for generating folded files.
Also consider flipping the files and hues to highlight reduced paths:
$0 folded2 folded1 | ./flamegraph.pl --negate > diff1.svg
USAGE_END
exit 2;
}
usage() if @ARGV < 2;
our($opt_h, $opt_n, $opt_s);
getopts('ns') or usage();
usage() if $opt_h;
$normalize = 1 if defined $opt_n;
$striphex = 1 if defined $opt_s;
my ($total1, $total2) = (0, 0);
my %Folded;
my $file1 = $ARGV[0];
my $file2 = $ARGV[1];
open FILE, $file1 or die "ERROR: Can't read $file1\n";
while (<FILE>) {
chomp;
my ($stack, $count) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
$stack =~ s/0x[0-9a-fA-F]+/0x.../g if $striphex;
$Folded{$stack}{1} += $count;
$total1 += $count;
}
close FILE;
open FILE, $file2 or die "ERROR: Can't read $file2\n";
while (<FILE>) {
chomp;
my ($stack, $count) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
$stack =~ s/0x[0-9a-fA-F]+/0x.../g if $striphex;
$Folded{$stack}{2} += $count;
$total2 += $count;
}
close FILE;
foreach my $stack (keys %Folded) {
$Folded{$stack}{1} = 0 unless defined $Folded{$stack}{1};
$Folded{$stack}{2} = 0 unless defined $Folded{$stack}{2};
if ($normalize && $total1 != $total2) {
$Folded{$stack}{1} = int($Folded{$stack}{1} * $total2 / $total1);
}
print "$stack $Folded{$stack}{1} $Folded{$stack}{2}\n";
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 152 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 395 KiB

View File

@@ -0,0 +1,62 @@
#!/usr/bin/perl -w
#
# files.pl Print file sizes in folded format, for a flame graph.
#
# This helps you understand storage consumed by a file system, by creating
# a flame graph visualization of space consumed. This is basically a Perl
# version of the "find" command, which emits in folded format for piping
# into flamegraph.pl.
#
# Copyright (c) 2017 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 03-Feb-2017 Brendan Gregg Created this.
use strict;
use File::Find;
sub usage {
print STDERR "USAGE: $0 [--xdev] [DIRECTORY]...\n";
print STDERR " eg, $0 /Users\n";
print STDERR " To not descend directories on other filesystems:";
print STDERR " eg, $0 --xdev /\n";
print STDERR "Intended to be piped to flamegraph.pl. Full example:\n";
print STDERR " $0 /Users | flamegraph.pl " .
"--hash --countname=bytes > files.svg\n";
print STDERR " $0 /usr /home /root /etc | flamegraph.pl " .
"--hash --countname=bytes > files.svg\n";
print STDERR " $0 --xdev / | flamegraph.pl " .
"--hash --countname=bytes > files.svg\n";
exit 1;
}
usage() if @ARGV == 0 or $ARGV[0] eq "--help" or $ARGV[0] eq "-h";
my $filter_xdev = 0;
my $xdev_id;
foreach my $dir (@ARGV) {
if ($dir eq "--xdev") {
$filter_xdev = 1;
} else {
find(\&wanted, $dir);
}
}
sub wanted {
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size) = lstat($_);
return unless defined $size;
if ($filter_xdev) {
if (!$xdev_id) {
$xdev_id = $dev;
} elsif ($xdev_id ne $dev) {
$File::Find::prune = 1;
return;
}
}
my $path = $File::Find::name;
$path =~ tr/\//;/; # delimiter
$path =~ tr/;.a-zA-Z0-9-/_/c; # ditch whitespace and other chars
$path =~ s/^;//;
print "$path $size\n";
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,104 @@
#!/bin/bash
#
# jmaps - creates java /tmp/perf-PID.map symbol maps for all java processes.
#
# This is a helper script that finds all running "java" processes, then executes
# perf-map-agent on them all, creating symbol map files in /tmp. These map files
# are read by perf_events (aka "perf") when doing system profiles (specifically,
# the "report" and "script" subcommands).
#
# USAGE: jmaps [-u]
# -u # unfoldall: include inlined symbols
#
# My typical workflow is this:
#
# perf record -F 99 -a -g -- sleep 30; jmaps
# perf script > out.stacks
# ./stackcollapse-perf.pl out.stacks | ./flamegraph.pl --color=java --hash > out.stacks.svg
#
# The stackcollapse-perf.pl and flamegraph.pl programs come from:
# https://github.com/brendangregg/FlameGraph
#
# REQUIREMENTS:
# Tune two environment settings below.
#
# 13-Feb-2015 Brendan Gregg Created this.
# 20-Feb-2017 " " Added -u for unfoldall.
JAVA_HOME=${JAVA_HOME:-/usr/lib/jvm/java-8-oracle}
AGENT_HOME=${AGENT_HOME:-/usr/lib/jvm/perf-map-agent} # from https://github.com/jvm-profiling-tools/perf-map-agent
debug=0
if [[ "$USER" != root ]]; then
echo "ERROR: not root user? exiting..."
exit
fi
if [[ ! -x $JAVA_HOME ]]; then
echo "ERROR: JAVA_HOME not set correctly; edit $0 and fix"
exit
fi
if [[ ! -x $AGENT_HOME ]]; then
echo "ERROR: AGENT_HOME not set correctly; edit $0 and fix"
exit
fi
if [[ "$1" == "-u" ]]; then
opts=unfoldall
fi
# figure out where the agent files are:
AGENT_OUT=""
AGENT_JAR=""
if [[ -e $AGENT_HOME/out/attach-main.jar ]]; then
AGENT_JAR=$AGENT_HOME/out/attach-main.jar
elif [[ -e $AGENT_HOME/attach-main.jar ]]; then
AGENT_JAR=$AGENT_HOME/attach-main.jar
fi
if [[ -e $AGENT_HOME/out/libperfmap.so ]]; then
AGENT_OUT=$AGENT_HOME/out
elif [[ -e $AGENT_HOME/libperfmap.so ]]; then
AGENT_OUT=$AGENT_HOME
fi
if [[ "$AGENT_OUT" == "" || "$AGENT_JAR" == "" ]]; then
echo "ERROR: Missing perf-map-agent files in $AGENT_HOME. Check installation."
exit
fi
# Fetch map for all "java" processes
echo "Fetching maps for all java processes..."
for pid in $(pgrep -x java); do
mapfile=/tmp/perf-$pid.map
[[ -e $mapfile ]] && rm $mapfile
cmd="cd $AGENT_OUT; $JAVA_HOME/bin/java -Xms32m -Xmx128m -cp $AGENT_JAR:$JAVA_HOME/lib/tools.jar net.virtualvoid.perf.AttachOnce $pid $opts"
(( debug )) && echo $cmd
user=$(ps ho user -p $pid)
group=$(ps ho group -p $pid)
if [[ "$user" != root ]]; then
if [[ "$user" == [0-9]* ]]; then
# UID only, likely GID too, run sudo with #UID:
cmd="sudo -u '#'$user -g '#'$group sh -c '$cmd'"
else
cmd="sudo -u $user -g $group sh -c '$cmd'"
fi
fi
echo "Mapping PID $pid (user $user):"
if (( debug )); then
time eval $cmd
else
eval $cmd
fi
if [[ -e "$mapfile" ]]; then
chown root $mapfile
chmod 666 $mapfile
else
echo "ERROR: $mapfile not created."
fi
echo "wc(1): $(wc $mapfile)"
echo
done

View File

@@ -0,0 +1,86 @@
#!/usr/bin/perl -w
#
# pkgsplit-perf.pl Split IP samples on package names "/", eg, Java.
#
# This is for the creation of Java package flame graphs. Example steps:
#
# perf record -F 199 -a -- sleep 30; ./jmaps
# perf script | ./pkgsplit-perf.pl | ./flamegraph.pl > out.svg
#
# Note that stack traces are not sampled (no -g), as we split Java package
# names into frames rather than stack frames.
#
# (jmaps is a helper script for automating perf-map-agent: Java symbol dumps.)
#
# The default output of "perf script" varies between kernel versions, so we'll
# need to deal with that here. I could make people use the perf script option
# to pick fields, so our input is static, but A) I prefer the simplicity of
# just saying: run "perf script", and B) the option to choose fields itself
# changed between kernel versions! -f became -F.
#
# Copyright 2017 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 20-Sep-2016 Brendan Gregg Created this.
use strict;
my $include_pname = 1; # include process names in stacks
my $include_pid = 0; # include process ID with process name
my $include_tid = 0; # include process & thread ID with process name
while (<>) {
# filter comments
next if /^#/;
# filter idle events
next if /xen_hypercall_sched_op|cpu_idle|native_safe_halt/;
my ($pid, $tid, $pname);
# Linux 3.13:
# java 13905 [000] 8048.096572: cpu-clock: 7fd781ac3053 Ljava/util/Arrays$ArrayList;::toArray (/tmp/perf-12149.map)
# java 8301 [050] 13527.392454: cycles: 7fa8a80d9bff Dictionary::find(int, unsigned int, Symbol*, ClassLoaderData*, Handle, Thread*) (/usr/lib/jvm/java-8-oracle-1.8.0.121/jre/lib/amd64/server/libjvm.so)
# java 4567/8603 [023] 13527.389886: cycles: 7fa863349895 Lcom/google/gson/JsonObject;::add (/tmp/perf-4567.map)
#
# Linux 4.8:
# java 30894 [007] 452884.077440: 10101010 cpu-clock: 7f0acc8eff67 Lsun/nio/ch/SocketChannelImpl;::read+0x27 (/tmp/perf-30849.map)
# bash 26858/26858 [006] 5440237.995639: cpu-clock: 433573 [unknown] (/bin/bash)
#
if (/^\s+(\S.+?)\s+(\d+)\/*(\d+)*\s.*?:.*:/) {
# parse process name and pid/tid
if ($3) {
($pid, $tid) = ($2, $3);
} else {
($pid, $tid) = ("?", $2);
}
if ($include_tid) {
$pname = "$1-$pid/$tid";
} elsif ($include_pid) {
$pname = "$1-$pid";
} else {
$pname = $1;
}
$pname =~ tr/ /_/;
} else {
# not a match
next;
}
# parse rest of line
s/^.*?:.*?:\s+//;
s/ \(.*?\)$//;
chomp;
my ($addr, $func) = split(' ', $_, 2);
# strip Java's leading "L"
$func =~ s/^L//;
# replace numbers with X
$func =~ s/[0-9]/X/g;
# colon delimitered
$func =~ s:/:;:g;
print "$pname;$func 1\n";
}

View File

@@ -0,0 +1,137 @@
#!/usr/bin/perl -w
#
# range-perf Extract a time range from Linux "perf script" output.
#
# USAGE EXAMPLE:
#
# perf record -F 100 -a -- sleep 60
# perf script | ./perf2range.pl 10 20 # range 10 to 20 seconds only
# perf script | ./perf2range.pl 0 0.5 # first half second only
#
# MAKING A SERIES OF FLAME GRAPHS:
#
# Let's say you had the output of "perf script" in a file, out.stacks01, which
# was for a 180 second profile. The following command creates a series of
# flame graphs for each 10 second interval:
#
# for i in `seq 0 10 170`; do cat out.stacks01 | \
# ./perf2range.pl $i $((i + 10)) | ./stackcollapse-perf.pl | \
# grep -v cpu_idle | ./flamegraph.pl --hash --color=java \
# --title="range $i $((i + 10))" > out.range_$i.svg; echo $i done; done
#
# In that example, I used "--color=java" for the Java palette, and excluded
# the idle CPU task. Customize as needed.
#
# Copyright 2017 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 21-Feb-2017 Brendan Gregg Created this.
use strict;
use Getopt::Long;
use POSIX 'floor';
sub usage {
die <<USAGE_END;
USAGE: $0 [options] min_seconds max_seconds
--timeraw # use raw timestamps from perf
--timezerosecs # time starts at 0 secs, but keep offset from perf
eg,
$0 10 20 # only include samples between 10 and 20 seconds
USAGE_END
}
my $timeraw = 0;
my $timezerosecs = 0;
GetOptions(
'timeraw' => \$timeraw,
'timezerosecs' => \$timezerosecs,
) or usage();
if (@ARGV < 2 || $ARGV[0] eq "-h" || $ARGV[0] eq "--help") {
usage();
exit;
}
my $begin = $ARGV[0];
my $end = $ARGV[1];
#
# Parsing
#
# IP only examples:
#
# java 52025 [026] 99161.926202: cycles:
# java 14341 [016] 252732.474759: cycles: 7f36571947c0 nmethod::is_nmethod() const (/...
# java 14514 [022] 28191.353083: cpu-clock: 7f92b4fdb7d4 Ljava_util_List$size$0;::call (/tmp/perf-11936.map)
# swapper 0 [002] 6035557.056977: 10101010 cpu-clock: ffffffff810013aa xen_hypercall_sched_op+0xa (/lib/modules/4.9-virtual/build/vmlinux)
# bash 25370 603are 6036.991603: 10101010 cpu-clock: 4b931e [unknown] (/bin/bash)
# bash 25370/25370 6036036.799684: cpu-clock: 4b913b [unknown] (/bin/bash)
# other combinations are possible.
#
# Stack examples (-g):
#
# swapper 0 [021] 28648.467059: cpu-clock:
# ffffffff810013aa xen_hypercall_sched_op ([kernel.kallsyms])
# ffffffff8101cb2f default_idle ([kernel.kallsyms])
# ffffffff8101d406 arch_cpu_idle ([kernel.kallsyms])
# ffffffff810bf475 cpu_startup_entry ([kernel.kallsyms])
# ffffffff81010228 cpu_bringup_and_idle ([kernel.kallsyms])
#
# java 14375 [022] 28648.467079: cpu-clock:
# 7f92bdd98965 Ljava/io/OutputStream;::write (/tmp/perf-11936.map)
# 7f8808cae7a8 [unknown] ([unknown])
#
# swapper 0 [005] 5076.836336: cpu-clock:
# ffffffff81051586 native_safe_halt ([kernel.kallsyms])
# ffffffff8101db4f default_idle ([kernel.kallsyms])
# ffffffff8101e466 arch_cpu_idle ([kernel.kallsyms])
# ffffffff810c2b31 cpu_startup_entry ([kernel.kallsyms])
# ffffffff810427cd start_secondary ([kernel.kallsyms])
#
# swapper 0 [002] 6034779.719110: 10101010 cpu-clock:
# 2013aa xen_hypercall_sched_op+0xfe20000a (/lib/modules/4.9-virtual/build/vmlinux)
# a72f0e default_idle+0xfe20001e (/lib/modules/4.9-virtual/build/vmlinux)
# 2392bf arch_cpu_idle+0xfe20000f (/lib/modules/4.9-virtual/build/vmlinux)
# a73333 default_idle_call+0xfe200023 (/lib/modules/4.9-virtual/build/vmlinux)
# 2c91a4 cpu_startup_entry+0xfe2001c4 (/lib/modules/4.9-virtual/build/vmlinux)
# 22b64a cpu_bringup_and_idle+0xfe20002a (/lib/modules/4.9-virtual/build/vmlinux)
#
# bash 25370/25370 6035935.188539: cpu-clock:
# b9218 [unknown] (/bin/bash)
# 2037fe8 [unknown] ([unknown])
# other combinations are possible.
#
# This regexp matches the event line, and puts time in $1, and the event name
# in $2:
#
my $event_regexp = qr/ +([0-9\.]+): *\S* *(\S+):/;
my $line;
my $start = 0;
my $ok = 0;
my $time;
while (1) {
$line = <STDIN>;
last unless defined $line;
next if $line =~ /^#/; # skip comments
if ($line =~ $event_regexp) {
my ($ts, $event) = ($1, $2, $3);
$start = $ts if $start == 0;
if ($timezerosecs) {
$time = $ts - floor($start);
} elsif (!$timeraw) {
$time = $ts - $start;
} else {
$time = $ts; # raw times
}
$ok = 1 if $time >= $begin;
# assume samples are in time order:
exit if $time > $end;
}
print $line if $ok;
}

View File

@@ -0,0 +1,21 @@
#!/bin/bash
#
# record-test.sh - Overwrite flame graph test result files.
#
# See test.sh, which checks these resulting files.
#
# Currently only tests stackcollapse-perf.pl.
set -v -x
# ToDo: add some form of --inline, and --inline --context tests. These are
# tricky since they use addr2line, whose output will vary based on the test
# system's binaries and symbol tables.
for opt in pid tid kernel jit all addrs; do
for testfile in test/*.txt ; do
echo testing $testfile : $opt
outfile=${testfile#*/}
outfile=test/results/${outfile%.txt}"-collapsed-${opt}.txt"
./stackcollapse-perf.pl --"${opt}" "${testfile}" 2> /dev/null > $outfile
done
done

View File

@@ -0,0 +1,61 @@
#!/usr/bin/perl -ws
#
# stackcollapse-aix Collapse AIX /usr/bin/procstack backtraces
#
# Parse a list of backtraces as generated with the poor man's aix-perf.pl
# profiler
#
use strict;
my $process = "";
my $current = "";
my $previous_function = "";
my %stacks;
while(<>) {
chomp;
if (m/^\d+:/) {
if(!($current eq "")) {
$current = $process . ";" . $current;
$stacks{$current} += 1;
$current = "";
}
m/^\d+: ([^ ]*)/;
$process = $1;
$current = "";
}
elsif(m/^---------- tid# \d+/){
if(!($current eq "")) {
$current = $process . ";" . $current;
$stacks{$current} += 1;
}
$current = "";
}
elsif(m/^(0x[0-9abcdef]*) *([^ ]*) ([^ ]*) ([^ ]*)/) {
my $function = $2;
my $alt = $1;
$function=~s/\(.*\)?//;
if($function =~ /^\[.*\]$/) {
$function = $alt;
}
if ($current) {
$current = $function . ";" . $current;
}
else {
$current = $function;
}
}
}
if(!($current eq "")) {
$current = $process . ";" . $current;
$stacks{$current} += 1;
$current = "";
$process = "";
}
foreach my $k (sort { $a cmp $b } keys %stacks) {
print "$k $stacks{$k}\n";
}

View File

@@ -0,0 +1,72 @@
#!/usr/bin/perl -w
#
# stackcollapse-bpftrace.pl collapse bpftrace samples into single lines.
#
# USAGE ./stackcollapse-bpftrace.pl infile > outfile
#
# Example input:
#
# @[
# _raw_spin_lock_bh+0
# tcp_recvmsg+808
# inet_recvmsg+81
# sock_recvmsg+67
# sock_read_iter+144
# new_sync_read+228
# __vfs_read+41
# vfs_read+142
# sys_read+85
# do_syscall_64+115
# entry_SYSCALL_64_after_hwframe+61
# ]: 3
#
# Example output:
#
# entry_SYSCALL_64_after_hwframe+61;do_syscall_64+115;sys_read+85;vfs_read+142;__vfs_read+41;new_sync_read+228;sock_read_iter+144;sock_recvmsg+67;inet_recvmsg+81;tcp_recvmsg+808;_raw_spin_lock_bh+0 3
#
# Copyright 2018 Peter Sanford. All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# (http://www.gnu.org/copyleft/gpl.html)
#
use strict;
my @stack;
my $in_stack = 0;
foreach (<>) {
chomp;
if (!$in_stack) {
if (/^@\[$/) {
$in_stack = 1;
} elsif (/^@\[,\s(.*)\]: (\d+)/) {
print $1 . " $2\n";
}
} else {
if (m/^,?\s?(.*)\]: (\d+)/) {
if (length $1) {
push(@stack, $1);
}
print join(';', reverse(@stack)) . " $2\n";
$in_stack = 0;
@stack = ();
} else {
$_ =~ s/^\s+//;
push(@stack, $_);
}
}
}

View File

@@ -0,0 +1,144 @@
#!/usr/bin/python
#
# stackcolllapse-chrome-tracing.py collapse Trace Event Format [1]
# callstack events into single lines.
#
# [1] https://github.com/catapult-project/catapult/wiki/Trace-Event-Format
#
# USAGE: ./stackcollapse-chrome-tracing.py input_json [input_json...] > outfile
#
# Example input:
#
# {"traceEvents":[
# {"pid":1,"tid":2,"ts":0,"ph":"X","name":"Foo","dur":50},
# {"pid":1,"tid":2,"ts":10,"ph":"X","name":"Bar","dur":30}
# ]}
#
# Example output:
#
# Foo 20.0
# Foo;Bar 30.0
#
# Input may contain many stack trace events from many processes/threads.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# 4-Jan-2018 Marcin Kolny Created this.
import argparse
import json
stack_identifiers = {}
class Event:
def __init__(self, label, timestamp, dur):
self.label = label
self.timestamp = timestamp
self.duration = dur
self.total_duration = dur
def get_stop_timestamp(self):
return self.timestamp + self.duration
def cantor_pairing(a, b):
s = a + b
return s * (s + 1) / 2 + b
def get_trace_events(trace_file, events_dict):
json_data = json.load(trace_file)
for entry in json_data['traceEvents']:
if entry['ph'] == 'X':
cantor_val = cantor_pairing(int(entry['tid']), int(entry['pid']))
if 'dur' not in entry:
continue
if cantor_val not in events_dict:
events_dict[cantor_val] = []
events_dict[cantor_val].append(Event(entry['name'], float(entry['ts']), float(entry['dur'])))
def load_events(trace_files):
events = {}
for trace_file in trace_files:
get_trace_events(trace_file, events)
for key in events:
events[key].sort(key=lambda x: x.timestamp)
return events
def save_stack(stack):
first = True
event = None
identifier = ''
for event in stack:
if first:
first = False
else:
identifier += ';'
identifier += event.label
if not event:
return
if identifier in stack_identifiers:
stack_identifiers[identifier] += event.total_duration
else:
stack_identifiers[identifier] = event.total_duration
def load_stack_identifiers(events):
event_stack = []
for e in events:
if not event_stack:
event_stack.append(e)
else:
while event_stack and event_stack[-1].get_stop_timestamp() <= e.timestamp:
save_stack(event_stack)
event_stack.pop()
if event_stack:
event_stack[-1].total_duration -= e.duration
event_stack.append(e)
while event_stack:
save_stack(event_stack)
event_stack.pop()
parser = argparse.ArgumentParser()
parser.add_argument('input_file', nargs='+',
type=argparse.FileType('r'),
help='Chrome Tracing input files')
args = parser.parse_args()
all_events = load_events(args.input_file)
for tid_pid in all_events:
load_stack_identifiers(all_events[tid_pid])
for identifiers, duration in stack_identifiers.items():
print(identifiers + ' ' + str(duration))

View File

@@ -0,0 +1,98 @@
#!/usr/bin/perl -w
#
# stackcollapse-elfutils Collapse elfutils stack (eu-stack) backtraces
#
# Parse a list of elfutils backtraces as generated with the poor man's
# profiler [1]:
#
# for x in $(seq 1 "$nsamples"); do
# eu-stack -p "$pid" "$@"
# sleep "$sleeptime"
# done
#
# [1] http://poormansprofiler.org/
#
# Copyright 2014 Gabriel Corona. All rights reserved.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
use strict;
use Getopt::Long;
my $with_pid = 0;
my $with_tid = 0;
GetOptions('pid' => \$with_pid,
'tid' => \$with_tid)
or die <<USAGE_END;
USAGE: $0 [options] infile > outfile\n
--pid # include PID
--tid # include TID
USAGE_END
my $pid = "";
my $tid = "";
my $current = "";
my $previous_function = "";
my %stacks;
sub add_current {
if(!($current eq "")) {
my $entry;
if ($with_tid) {
$current = "TID=$tid;$current";
}
if ($with_pid) {
$current = "PID=$pid;$current";
}
$stacks{$current} += 1;
$current = "";
}
}
while(<>) {
chomp;
if (m/^PID ([0-9]*)/) {
add_current();
$pid = $1;
}
elsif(m/^TID ([0-9]*)/) {
add_current();
$tid = $1;
} elsif(m/^#[0-9]* *0x[0-9a-f]* (.*)/) {
if ($current eq "") {
$current = $1;
} else {
$current = "$1;$current";
}
} elsif(m/^#[0-9]* *0x[0-9a-f]*/) {
if ($current eq "") {
$current = "[unknown]";
} else {
$current = "[unknown];$current";
}
}
}
add_current();
foreach my $k (sort { $a cmp $b } keys %stacks) {
print "$k $stacks{$k}\n";
}

View File

@@ -0,0 +1,61 @@
#!/usr/bin/perl -ws
#
# stackcollapse-faulthandler Collapse Python faulthandler backtraces
#
# Parse a list of Python faulthandler backtraces as generated with
# faulthandler.dump_traceback_later.
#
# Copyright 2014 Gabriel Corona. All rights reserved.
# Copyright 2017 Jonathan Kolb. All rights reserved.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
use strict;
my $current = "";
my %stacks;
while(<>) {
chomp;
if (m/^Thread/) {
$current=""
}
elsif(m/^ File "([^"]*)", line ([0-9]*) in (.*)/) {
my $function = $1 . ":" . $2 . ":" . $3;
if ($current eq "") {
$current = $function;
} else {
$current = $function . ";" . $current;
}
} elsif(!($current eq "")) {
$stacks{$current} += 1;
$current = "";
}
}
if(!($current eq "")) {
$stacks{$current} += 1;
$current = "";
}
foreach my $k (sort { $a cmp $b } keys %stacks) {
print "$k $stacks{$k}\n";
}

View File

@@ -0,0 +1,72 @@
#!/usr/bin/perl -ws
#
# stackcollapse-gdb Collapse GDB backtraces
#
# Parse a list of GDB backtraces as generated with the poor man's
# profiler [1]:
#
# for x in $(seq 1 500); do
# gdb -ex "set pagination 0" -ex "thread apply all bt" -batch -p $pid 2> /dev/null
# sleep 0.01
# done
#
# [1] http://poormansprofiler.org/
#
# Copyright 2014 Gabriel Corona. All rights reserved.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
use strict;
my $current = "";
my $previous_function = "";
my %stacks;
while(<>) {
chomp;
if (m/^Thread/) {
$current=""
}
elsif(m/^#[0-9]* *([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)/) {
my $function = $3;
my $alt = $1;
if(not($1 =~ /0x[a-zA-Z0-9]*/)) {
$function = $alt;
}
if ($current eq "") {
$current = $function;
} else {
$current = $function . ";" . $current;
}
} elsif(!($current eq "")) {
$stacks{$current} += 1;
$current = "";
}
}
if(!($current eq "")) {
$stacks{$current} += 1;
$current = "";
}
foreach my $k (sort { $a cmp $b } keys %stacks) {
print "$k $stacks{$k}\n";
}

View File

@@ -0,0 +1,150 @@
#!/usr/bin/perl -w
#
# stackcollapse-go.pl collapse golang samples into single lines.
#
# Parses golang smaples generated by "go tool pprof" and outputs stacks as
# single lines, with methods separated by semicolons, and then a space and an
# occurrence count. For use with flamegraph.pl.
#
# USAGE: ./stackcollapse-go.pl infile > outfile
#
# Example Input:
# ...
# Samples:
# samples/count cpu/nanoseconds
# 1 10000000: 1 2
# 2 10000000: 3 2
# 1 10000000: 4 2
# ...
# Locations
# 1: 0x58b265 scanblock :0 s=0
# 2: 0x599530 GC :0 s=0
# 3: 0x58a999 flushptrbuf :0 s=0
# 4: 0x58d6a8 runtime.MSpan_Sweep :0 s=0
# ...
# Mappings
# ...
#
# Example Output:
#
# GC;flushptrbuf 2
# GC;runtime.MSpan_Sweep 1
# GC;scanblock 1
#
# Input may contain many stacks as generated from go tool pprof:
#
# go tool pprof -seconds=60 -raw -output=a.pprof http://$ADDR/debug/pprof/profile
#
# For format of text profile, See golang/src/internal/pprof/profile/profile.go
#
# Copyright 2017 Sijie Yang (yangsijie@baidu.com). All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# (http://www.gnu.org/copyleft/gpl.html)
#
# 16-Jan-2017 Sijie Yang Created this.
use strict;
use Getopt::Long;
# tunables
my $help = 0;
sub usage {
die <<USAGE_END;
USAGE: $0 infile > outfile\n
USAGE_END
}
GetOptions(
'help' => \$help,
) or usage();
$help && usage();
# internals
my $state = "ignore";
my %stacks;
my %frames;
my %collapsed;
sub remember_stack {
my ($stack, $count) = @_;
$stacks{$stack} += $count;
}
#
# Output stack string in required format. For example, for the following samples,
# format_statck() would return GC;runtime.MSpan_Sweep for stack "4 2"
#
# Locations
# 1: 0x58b265 scanblock :0 s=0
# 2: 0x599530 GC :0 s=0
# 3: 0x58a999 flushptrbuf :0 s=0
# 4: 0x58d6a8 runtime.MSpan_Sweep :0 s=0
#
sub format_statck {
my ($stack) = @_;
my @loc_list = split(/ /, $stack);
for (my $i=0; $i<=$#loc_list; $i++) {
my $loc_name = $frames{$loc_list[$i]};
$loc_list[$i] = $loc_name if ($loc_name);
}
return join(";", reverse(@loc_list));
}
foreach (<>) {
next if m/^#/;
chomp;
if ($state eq "ignore") {
if (/Samples:/) {
$state = "sample";
next;
}
} elsif ($state eq "sample") {
if (/^\s*([0-9]+)\s*[0-9]+: ([0-9 ]+)/) {
my $samples = $1;
my $stack = $2;
remember_stack($stack, $samples);
} elsif (/Locations/) {
$state = "location";
next;
}
} elsif ($state eq "location") {
if (/^\s*([0-9]*): 0x[0-9a-f]+ (M=[0-9]+ )?([^ ]+) .*/) {
my $loc_id = $1;
my $loc_name = $3;
$frames{$loc_id} = $loc_name;
} elsif (/Mappings/) {
$state = "mapping";
last;
}
}
}
foreach my $k (keys %stacks) {
my $stack = format_statck($k);
my $count = $stacks{$k};
$collapsed{$stack} += $count;
}
foreach my $k (sort { $a cmp $b } keys %collapsed) {
print "$k $collapsed{$k}\n";
}

View File

@@ -0,0 +1,145 @@
#!/usr/bin/perl -w
#
# stackcollapse-ibmjava.pl collapse jstack samples into single lines.
#
# Parses Java stacks generated by IBM Java with methods separated by semicolons,
# and then a space and an occurrence count.
#
# USAGE: ./stackcollapse-ibmjava.pl infile > outfile
#
# Example input:
#
# NULL
# 1XMTHDINFO Thread Details
# NULL
# NULL
# 3XMTHREADINFO "Default Executor-thread-149164" J9VMThread:0x0000000008132B00, j9thread_t:0x000000001A810B90, java/lang/Thread:0x0000000712BE8E48, state:R, prio=5
# 3XMJAVALTHREAD (java/lang/Thread getId:0x3493E, isDaemon:true)
# 3XMTHREADINFO1 (native thread ID:0x3158, native priority:0x5, native policy:UNKNOWN, vmstate:R, vm thread flags:0x00000001)
# 3XMCPUTIME CPU usage total: 0.421875000 secs, user: 0.343750000 secs, system: 0.078125000 secs, current category="Application"
# 3XMHEAPALLOC Heap bytes allocated since last GC cycle=0 (0x0)
# 3XMTHREADINFO3 Java callstack:
# 4XESTACKTRACE at java/net/SocketInputStream.socketRead0(Native Method)
# 4XESTACKTRACE at java/net/SocketInputStream.socketRead(SocketInputStream.java:127(Compiled Code))
# 4XESTACKTRACE at java/net/SocketInputStream.read(SocketInputStream.java:182(Compiled Code))
# 4XESTACKTRACE at java/net/SocketInputStream.read(SocketInputStream.java:152(Compiled Code))
# 4XESTACKTRACE at java/io/FilterInputStream.read(FilterInputStream.java:144(Compiled Code))
# ...
# 4XESTACKTRACE at java/lang/Thread.run(Thread.java:785(Compiled Code))
#
# Example output:
#
# Default Executor-thread-149164;java/lang/Thread.run;java/net/SocketInputStream/read;java/net/SocketInputStream.socketRead0 1
#
#
# Copyright 2014 Federico Juinio. All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# (http://www.gnu.org/copyleft/gpl.html)
#
# 23-Aug-2023 Federico Juinio created this based from stackcollapse-jstack.pl
use strict;
use Getopt::Long;
# tunables
my $include_tname = 1; # include thread names in stacks
my $include_tid = 0; # include thread IDs in stacks
my $shorten_pkgs = 0; # shorten package names
my $help = 0;
sub usage {
die <<USAGE_END;
USAGE: $0 [options] infile > outfile\n
--include-tname
--no-include-tname # include/omit thread names in stacks (default: include)
--include-tid
--no-include-tid # include/omit thread IDs in stacks (default: omit)
--shorten-pkgs
--no-shorten-pkgs # (don't) shorten package names (default: don't shorten)
eg,
$0 --no-include-tname stacks.txt > collapsed.txt
USAGE_END
}
GetOptions(
'include-tname!' => \$include_tname,
'include-tid!' => \$include_tid,
'shorten-pkgs!' => \$shorten_pkgs,
'help' => \$help,
) or usage();
$help && usage();
# internals
my %collapsed;
sub remember_stack {
my ($stack, $count) = @_;
$collapsed{$stack} += $count;
}
my @stack;
my $tname;
my $state = "?";
foreach (<>) {
next if m/^#/;
chomp;
if (m/^3XMTHREADINFO3 Native callstack:/) {
# save stack
if (defined $tname) { unshift @stack, $tname; }
remember_stack(join(";", @stack), 1) if @stack;
undef @stack;
undef $tname;
$state = "?";
next;
}
# look for thread header line and parse thread name and state
if (/^3XMTHREADINFO "([^"]*).* state:(.*), /) {
my $name = $1;
if ($include_tname) {
$tname = $name;
}
$state = $2;
# special handling for "Anonymous native threads"
} elsif (/3XMTHREADINFO Anonymous native thread/) {
$tname = "Anonymous native thread";
# look for thread id
} elsif (/^3XMTHREADINFO1 \(native thread ID:([^ ]*), native priority/) {
if ($include_tname && $include_tid) {
$tname = $tname . "-" . $1
}
# collect stack frames
} elsif (/^4XESTACKTRACE at ([^\(]*)/) {
my $func = $1;
if ($shorten_pkgs) {
my ($pkgs, $clsFunc) = ( $func =~ m/(.*\.)([^.]+\.[^.]+)$/ );
$pkgs =~ s/(\w)\w*/$1/g;
$func = $pkgs . $clsFunc;
}
unshift @stack, $func;
}
}
foreach my $k (sort { $a cmp $b } keys %collapsed) {
print "$k $collapsed{$k}\n";
}

View File

@@ -0,0 +1,34 @@
#!/usr/bin/perl -w
#
# stackcollapse-instruments.pl
#
# Parses a file containing a call tree as produced by XCode Instruments
# (Edit > Deep Copy) and produces output suitable for flamegraph.pl.
#
# USAGE: ./stackcollapse-instruments.pl infile > outfile
use strict;
my @stack = ();
<>;
foreach (<>) {
chomp;
/\d+\.\d+ (?:min|s|ms)\s+\d+\.\d+%\s+(\d+(?:\.\d+)?) (min|s|ms)\t \t(\s*)(.+)/ or die;
my $func = $4;
my $depth = length ($3);
$stack [$depth] = $4;
foreach my $i (0 .. $depth - 1) {
print $stack [$i];
print ";";
}
my $time = 0 + $1;
if ($2 eq "min") {
$time *= 60*1000;
} elsif ($2 eq "s") {
$time *= 1000;
}
printf("%s %.0f\n", $func, $time);
}

View File

@@ -0,0 +1,72 @@
#!/usr/bin/perl -w
#
# stackcolllapse-java-exceptions.pl collapse java exceptions (found in logs) into single lines.
#
# Parses Java error stacks found in a log file and outputs them as
# single lines, with methods separated by semicolons, and then a space and an
# occurrence count. Inspired by stackcollapse-jstack.pl except that it does
# not act as a performance profiler.
#
# It can be useful if a Java process dumps a lot of different stacks in its logs
# and you want to quickly identify the biggest culprits.
#
# USAGE: ./stackcollapse-java-exceptions.pl infile > outfile
#
# Copyright 2018 Paul de Verdiere. All rights reserved.
use strict;
use Getopt::Long;
# tunables
my $shorten_pkgs = 0; # shorten package names
my $no_pkgs = 0; # really shorten package names!!
my $help = 0;
sub usage {
die <<USAGE_END;
USAGE: $0 [options] infile > outfile\n
--shorten-pkgs : shorten package names
--no-pkgs : suppress package names (makes SVG much more readable)
USAGE_END
}
GetOptions(
'shorten-pkgs!' => \$shorten_pkgs,
'no-pkgs!' => \$no_pkgs,
'help' => \$help,
) or usage();
$help && usage();
my %collapsed;
sub remember_stack {
my ($stack, $count) = @_;
$collapsed{$stack} += $count;
}
my @stack;
foreach (<>) {
chomp;
if (/^\s*at ([^\(]*)/) {
my $func = $1;
if ($shorten_pkgs || $no_pkgs) {
my ($pkgs, $clsFunc) = ( $func =~ m/(.*\.)([^.]+\.[^.]+)$/ );
$pkgs =~ s/(\w)\w*/$1/g;
$func = $no_pkgs ? $clsFunc: $pkgs . $clsFunc;
}
unshift @stack, $func;
} elsif (@stack ) {
next if m/.*waiting on .*/;
remember_stack(join(";", @stack), 1) if @stack;
undef @stack;
}
}
remember_stack(join(";", @stack), 1) if @stack;
foreach my $k (sort { $a cmp $b } keys %collapsed) {
print "$k $collapsed{$k}\n";
}

View File

@@ -0,0 +1,176 @@
#!/usr/bin/perl -w
#
# stackcollapse-jstack.pl collapse jstack samples into single lines.
#
# Parses Java stacks generated by jstack(1) and outputs RUNNABLE stacks as
# single lines, with methods separated by semicolons, and then a space and an
# occurrence count. This also filters some other "RUNNABLE" states that we
# know are probably not running, such as epollWait. For use with flamegraph.pl.
#
# You want this to process the output of at least 100 jstack(1)s. ie, run it
# 100 times with a sleep interval, and append to a file. This is really a poor
# man's Java profiler, due to the overheads of jstack(1), and how it isn't
# capturing stacks asynchronously. For a better profiler, see:
# http://www.brendangregg.com/blog/2014-06-12/java-flame-graphs.html
#
# USAGE: ./stackcollapse-jstack.pl infile > outfile
#
# Example input:
#
# "MyProg" #273 daemon prio=9 os_prio=0 tid=0x00007f273c038800 nid=0xe3c runnable [0x00007f28a30f2000]
# java.lang.Thread.State: RUNNABLE
# at java.net.SocketInputStream.socketRead0(Native Method)
# at java.net.SocketInputStream.read(SocketInputStream.java:121)
# ...
# at java.lang.Thread.run(Thread.java:744)
#
# Example output:
#
# MyProg;java.lang.Thread.run;java.net.SocketInputStream.read;java.net.SocketInputStream.socketRead0 1
#
# Input may be created and processed using:
#
# i=0; while (( i++ < 200 )); do jstack PID >> out.jstacks; sleep 10; done
# cat out.jstacks | ./stackcollapse-jstack.pl > out.stacks-folded
#
# WARNING: jstack(1) incurs overheads. Test before use, or use a real profiler.
#
# Copyright 2014 Brendan Gregg. All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# (http://www.gnu.org/copyleft/gpl.html)
#
# 14-Sep-2014 Brendan Gregg Created this.
use strict;
use Getopt::Long;
# tunables
my $include_tname = 1; # include thread names in stacks
my $include_tid = 0; # include thread IDs in stacks
my $shorten_pkgs = 0; # shorten package names
my $help = 0;
sub usage {
die <<USAGE_END;
USAGE: $0 [options] infile > outfile\n
--include-tname
--no-include-tname # include/omit thread names in stacks (default: include)
--include-tid
--no-include-tid # include/omit thread IDs in stacks (default: omit)
--shorten-pkgs
--no-shorten-pkgs # (don't) shorten package names (default: don't shorten)
eg,
$0 --no-include-tname stacks.txt > collapsed.txt
USAGE_END
}
GetOptions(
'include-tname!' => \$include_tname,
'include-tid!' => \$include_tid,
'shorten-pkgs!' => \$shorten_pkgs,
'help' => \$help,
) or usage();
$help && usage();
# internals
my %collapsed;
sub remember_stack {
my ($stack, $count) = @_;
$collapsed{$stack} += $count;
}
my @stack;
my $tname;
my $state = "?";
foreach (<>) {
next if m/^#/;
chomp;
if (m/^$/) {
# only include RUNNABLE states
goto clear if $state ne "RUNNABLE";
# save stack
if (defined $tname) { unshift @stack, $tname; }
remember_stack(join(";", @stack), 1) if @stack;
clear:
undef @stack;
undef $tname;
$state = "?";
next;
}
#
# While parsing jstack output, the $state variable may be altered from
# RUNNABLE to other states. This causes the stacks to be filtered later,
# since only RUNNABLE stacks are included.
#
if (/^"([^"]*)/) {
my $name = $1;
if ($include_tname) {
$tname = $name;
unless ($include_tid) {
$tname =~ s/-\d+$//;
}
}
# set state for various background threads
$state = "BACKGROUND" if $name =~ /C. CompilerThread/;
$state = "BACKGROUND" if $name =~ /Signal Dispatcher/;
$state = "BACKGROUND" if $name =~ /Service Thread/;
$state = "BACKGROUND" if $name =~ /Attach Listener/;
} elsif (/java.lang.Thread.State: (\S+)/) {
$state = $1 if $state eq "?";
} elsif (/^\s*at ([^\(]*)/) {
my $func = $1;
if ($shorten_pkgs) {
my ($pkgs, $clsFunc) = ( $func =~ m/(.*\.)([^.]+\.[^.]+)$/ );
$pkgs =~ s/(\w)\w*/$1/g;
$func = $pkgs . $clsFunc;
}
unshift @stack, $func;
# fix state for epollWait
$state = "WAITING" if $func =~ /epollWait/;
$state = "WAITING" if $func =~ /EPoll\.wait/;
# fix state for various networking functions
$state = "NETWORK" if $func =~ /socketAccept$/;
$state = "NETWORK" if $func =~ /Socket.*accept0$/;
$state = "NETWORK" if $func =~ /socketRead0$/;
} elsif (/^\s*-/ or /^2\d\d\d-/ or /^Full thread dump/ or
/^JNI global references:/) {
# skip these info lines
next;
} else {
warn "Unrecognized line: $_";
}
}
foreach my $k (sort { $a cmp $b } keys %collapsed) {
print "$k $collapsed{$k}\n";
}

View File

@@ -0,0 +1,74 @@
#!/usr/bin/awk -f
#
# stackcollapse-ljp.awk collapse lightweight java profile reports
# into single lines stacks.
#
# Parses a list of multiline stacks generated by:
#
# https://code.google.com/p/lightweight-java-profiler
#
# and outputs a semicolon separated stack followed by a space and a count.
#
# USAGE: ./stackcollapse-ljp.pl infile > outfile
#
# Example input:
#
# 42 3 my_func_b(prog.java:455)
# my_func_a(prog.java:123)
# java.lang.Thread.run(Thread.java:744)
# [...]
#
# Example output:
#
# java.lang.Thread.run;my_func_a;my_func_b 42
#
# The unused number is the number of frames in each stack.
#
# Copyright 2014 Brendan Gregg. All rights reserved.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# 12-Jun-2014 Brendan Gregg Created this.
$1 == "Total" {
# We're done. Print last stack and exit.
print stack, count
exit
}
{
# Strip file location. Comment this out to keep.
gsub(/\(.*\)/, "")
}
NF == 3 {
# New stack begins. Print previous buffered stack.
if (count)
print stack, count
# Begin a new stack.
count = $1
stack = $3
}
NF == 1 {
# Build stack.
stack = $1 ";" stack
}

View File

@@ -0,0 +1,228 @@
#!/usr/bin/awk -f
#
# This program generates collapsed off-cpu stacks fit for use by flamegraph.pl
# from scheduler data collected via perf_events.
#
# Outputs the cumulative time off cpu in us for each distinct stack observed.
#
# Some awk variables further control behavior:
#
# record_tid If truthy, causes all stack traces to include the
# command and LWP id.
#
# record_wake_stack If truthy, stacks include the frames from the wakeup
# event in addition to the sleep event.
# See http://www.brendangregg.com/FlameGraphs/offcpuflamegraphs.html#Wakeup
#
# recurse If truthy, attempt to recursively identify and
# visualize the full wakeup stack chain.
# See http://www.brendangregg.com/FlameGraphs/offcpuflamegraphs.html#ChainGraph
#
# Note that this is only an approximation, as only the
# last sleep event is recorded (e.g. if a thread slept
# multiple times before waking another thread, only the
# last sleep event is used). Implies record_wake_stack=1
#
# To set any of these variables from the command line, run via:
#
# stackcollapse-perf-sched.awk -v recurse=1
#
# == Important warning ==
#
# WARNING: tracing all scheduler events is very high overhead in perf. Even
# more alarmingly, there appear to be bugs in perf that prevent it from reliably
# getting consistent traces (even with large trace buffers), causing it to
# produce empty perf.data files with error messages of the form:
#
# 0x952790 [0x736d]: failed to process type: 3410
#
# This failure is not determinisitic, so re-executing perf record will
# eventually succeed.
#
# == Usage ==
#
# First, record data via perf_events:
#
# sudo perf record -g -e 'sched:sched_switch' \
# -e 'sched:sched_stat_sleep' -e 'sched:sched_stat_blocked' \
# -p <pid> -o perf.data -- sleep 1
#
# Then post process with this script:
#
# sudo perf script -f time,comm,pid,tid,event,ip,sym,dso,trace -i perf.data | \
# stackcollapse-perf-sched.awk -v recurse=1 | \
# flamegraph.pl --color=io --countname=us >out.svg
#
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2015 by MemSQL. All rights reserved.
#
#
# Match a perf captured variable, returning just the contents. For example, for
# the following line, get_perf_captured_variable("pid") would return "27235":
#
# swapper 0 [006] 708189.626415: sched:sched_stat_sleep: comm=memsqld pid=27235 delay=100078421 [ns
#
function get_perf_captured_variable(variable)
{
match($0, variable "=[^[:space:]]+")
return substr($0, RSTART + length(variable) + 1,
RLENGTH - length(variable) - 1)
}
#
# The timestamp is the first field that ends in a colon, e.g.:
#
# swapper 0 [006] 708189.626415: sched:sched_stat_sleep: comm=memsqld pid=27235 delay=100078421 [ns
#
# or
#
# swapper 0/0 708189.626415: sched:sched_stat_sleep: comm=memsqld pid=27235 delay=100078421 [ns]
#
function get_perf_timestamp()
{
match($0, " [^ :]+:")
return substr($0, RSTART + 1, RLENGTH - 2)
}
!/^#/ && /sched:sched_switch/ {
switchcommand = get_perf_captured_variable("comm")
switchpid = get_perf_captured_variable("prev_pid")
switchtime=get_perf_timestamp()
switchstack=""
}
#
# Strip the function name from a stack entry
#
# Stack entry is expected to be formatted like:
# c60849 MyClass::Foo(unsigned long) (/home/areece/a.out)
#
function get_function_name()
{
# We start from 2 since we don't need the hex offset.
# We stop at NF - 1 since we don't need the library path.
funcname = $2
for (i = 3; i <= NF - 1; i++) {
funcname = funcname " " $i
}
return funcname
}
(switchpid != 0 && /^\s/) {
if (switchstack == "") {
switchstack = get_function_name()
} else {
switchstack = get_function_name() ";" switchstack
}
}
(switchpid != 0 && /^$/) {
switch_stacks[switchpid] = switchstack
delete last_switch_stacks[switchpid]
switch_time[switchpid] = switchtime
switchpid=0
switchcommand=""
switchstack=""
}
!/^#/ && (/sched:sched_stat_sleep/ || /sched:sched_stat_blocked/) {
wakecommand=$1
wakepid=$2
waketime=get_perf_timestamp()
stat_next_command = get_perf_captured_variable("comm")
stat_next_pid = get_perf_captured_variable("pid")
stat_delay_ns = int(get_perf_captured_variable("delay"))
wakestack=""
}
(stat_next_pid != 0 && /^\s/) {
if (wakestack == "") {
wakestack = get_function_name()
} else {
# We build the wakestack in reverse order.
wakestack = wakestack ";" get_function_name()
}
}
(stat_next_pid != 0 && /^$/) {
#
# For some reason, perf appears to output duplicate
# sched:sched_stat_sleep and sched:sched_stat_blocked events. We only
# handle the first event.
#
if (stat_next_pid in switch_stacks) {
last_wake_time[stat_next_pid] = waketime
stack = switch_stacks[stat_next_pid]
if (recurse || record_wake_stack) {
stack = stack ";" wakestack
if (record_tid) {
stack = stack ";" wakecommand "-" wakepid
} else {
stack = stack ";" wakecommand
}
}
if (recurse) {
if (last_wake_time[wakepid] > last_switch_time[stat_next_pid]) {
stack = stack ";-;" last_switch_stacks[wakepid]
}
last_switch_stacks[stat_next_pid] = stack
}
delete switch_stacks[stat_next_pid]
if (record_tid) {
stack_times[stat_next_command "-" stat_next_pid ";" stack] += stat_delay_ns
} else {
stack_times[stat_next_command ";" stack] += stat_delay_ns
}
}
wakecommand=""
wakepid=0
stat_next_pid=0
stat_next_command=""
stat_delay_ms=0
}
END {
for (stack in stack_times) {
if (int(stack_times[stack] / 1000) > 0) {
print stack, int(stack_times[stack] / 1000)
}
}
}

View File

@@ -0,0 +1,435 @@
#!/usr/bin/perl -w
#
# stackcollapse-perf.pl collapse perf samples into single lines.
#
# Parses a list of multiline stacks generated by "perf script", and
# outputs a semicolon separated stack followed by a space and a count.
# If memory addresses (+0xd) are present, they are stripped, and resulting
# identical stacks are colased with their counts summed.
#
# USAGE: ./stackcollapse-perf.pl [options] infile > outfile
#
# Run "./stackcollapse-perf.pl -h" to list options.
#
# Example input:
#
# swapper 0 [000] 158665.570607: cpu-clock:
# ffffffff8103ce3b native_safe_halt ([kernel.kallsyms])
# ffffffff8101c6a3 default_idle ([kernel.kallsyms])
# ffffffff81013236 cpu_idle ([kernel.kallsyms])
# ffffffff815bf03e rest_init ([kernel.kallsyms])
# ffffffff81aebbfe start_kernel ([kernel.kallsyms].init.text)
# [...]
#
# Example output:
#
# swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1
#
# Input may be created and processed using:
#
# perf record -a -g -F 997 sleep 60
# perf script | ./stackcollapse-perf.pl > out.stacks-folded
#
# The output of "perf script" should include stack traces. If these are missing
# for you, try manually selecting the perf script output; eg:
#
# perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace | ...
#
# This is also required for the --pid or --tid options, so that the output has
# both the PID and TID.
#
# Copyright 2012 Joyent, Inc. All rights reserved.
# Copyright 2012 Brendan Gregg. All rights reserved.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# 02-Mar-2012 Brendan Gregg Created this.
# 02-Jul-2014 " " Added process name to stacks.
use strict;
use Getopt::Long;
my %collapsed;
sub remember_stack {
my ($stack, $count) = @_;
$collapsed{$stack} += $count;
}
my $annotate_kernel = 0; # put an annotation on kernel function
my $annotate_jit = 0; # put an annotation on jit symbols
my $annotate_all = 0; # enale all annotations
my $include_pname = 1; # include process names in stacks
my $include_pid = 0; # include process ID with process name
my $include_tid = 0; # include process & thread ID with process name
my $include_addrs = 0; # include raw address where a symbol can't be found
my $tidy_java = 1; # condense Java signatures
my $tidy_generic = 1; # clean up function names a little
my $target_pname; # target process name from perf invocation
my $event_filter = ""; # event type filter, defaults to first encountered event
my $event_defaulted = 0; # whether we defaulted to an event (none provided)
my $event_warning = 0; # if we printed a warning for the event
my $show_inline = 0;
my $show_context = 0;
my $srcline_in_input = 0; # if there are extra lines with source location (perf script -F+srcline)
GetOptions('inline' => \$show_inline,
'context' => \$show_context,
'srcline' => \$srcline_in_input,
'pid' => \$include_pid,
'kernel' => \$annotate_kernel,
'jit' => \$annotate_jit,
'all' => \$annotate_all,
'tid' => \$include_tid,
'addrs' => \$include_addrs,
'event-filter=s' => \$event_filter)
or die <<USAGE_END;
USAGE: $0 [options] infile > outfile\n
--pid # include PID with process names [1]
--tid # include TID and PID with process names [1]
--inline # un-inline using addr2line
--all # all annotations (--kernel --jit)
--kernel # annotate kernel functions with a _[k]
--jit # annotate jit functions with a _[j]
--context # adds source context to --inline
--srcline # parses output of 'perf script -F+srcline' and adds source context
--addrs # include raw addresses where symbols can't be found
--event-filter=EVENT # event name filter\n
[1] perf script must emit both PID and TIDs for these to work; eg, Linux < 4.1:
perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace
for Linux >= 4.1:
perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace
If you save this output add --header on Linux >= 3.14 to include perf info.
USAGE_END
if ($annotate_all) {
$annotate_kernel = $annotate_jit = 1;
}
my %inlineCache;
my %nmCache;
sub inlineCacheAdd {
my ($pc, $mod, $result) = @_;
if (defined($inlineCache{$pc})) {
$inlineCache{$pc}{$mod} = $result;
} else {
$inlineCache{$pc} = {$mod => $result};
}
}
# for the --inline option
sub inline {
my ($pc, $rawfunc, $mod) = @_;
return $inlineCache{$pc}{$mod} if defined($inlineCache{$pc}{$mod});
# capture addr2line output
my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`;
# remove first line
$a2l_output =~ s/^(.*\n){1}//;
if ($a2l_output =~ /\?\?\n\?\?:0/) {
# if addr2line fails and rawfunc is func+offset, then fall back to it
if ($rawfunc =~ /^(.+)\+0x([0-9a-f]+)$/) {
my $func = $1;
my $addr = hex $2;
$nmCache{$mod}=`nm $mod` unless defined $nmCache{$mod};
if ($nmCache{$mod} =~ /^([0-9a-f]+) . \Q$func\E$/m) {
my $base = hex $1;
my $newPc = sprintf "0x%x", $base+$addr;
my $result = inline($newPc, '', $mod);
inlineCacheAdd($pc, $mod, $result);
return $result;
}
}
}
my @fullfunc;
my $one_item = "";
for (split /^/, $a2l_output) {
chomp $_;
# remove discriminator info if exists
$_ =~ s/ \(discriminator \S+\)//;
if ($one_item eq "") {
$one_item = $_;
} else {
if ($show_context == 1) {
unshift @fullfunc, $one_item . ":$_";
} else {
unshift @fullfunc, $one_item;
}
$one_item = "";
}
}
my $result = join ";" , @fullfunc;
inlineCacheAdd($pc, $mod, $result);
return $result;
}
my @stack;
my $pname;
my $m_pid;
my $m_tid;
my $m_period;
#
# Main loop
#
while (defined($_ = <>)) {
# find the name of the process launched by perf, by stepping backwards
# over the args to find the first non-option (no dash):
if (/^# cmdline/) {
my @args = split ' ', $_;
foreach my $arg (reverse @args) {
if ($arg !~ /^-/) {
$target_pname = $arg;
$target_pname =~ s:.*/::; # strip pathname
last;
}
}
}
# skip remaining comments
next if m/^#/;
chomp;
# end of stack. save cached data.
if (m/^$/) {
# ignore filtered samples
next if not $pname;
if ($include_pname) {
if (defined $pname) {
unshift @stack, $pname;
} else {
unshift @stack, "";
}
}
remember_stack(join(";", @stack), $m_period) if @stack;
undef @stack;
undef $pname;
next;
}
#
# event record start
#
if (/^(\S.+?)\s+(\d+)\/*(\d+)*\s+/) {
# default "perf script" output has TID but not PID
# eg, "java 25607 4794564.109216: 1 cycles:"
# eg, "java 12688 [002] 6544038.708352: 235 cpu-clock:"
# eg, "V8 WorkerThread 25607 4794564.109216: 104345 cycles:"
# eg, "java 24636/25607 [000] 4794564.109216: 1 cycles:"
# eg, "java 12688/12764 6544038.708352: 10309278 cpu-clock:"
# eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: 100 cycles:"
# other combinations possible
my ($comm, $pid, $tid, $period) = ($1, $2, $3, "");
if (not $tid) {
$tid = $pid;
$pid = "?";
}
if (/:\s*(\d+)*\s+(\S+):\s*$/) {
$period = $1;
my $event = $2;
if ($event_filter eq "") {
# By default only show events of the first encountered
# event type. Merging together different types, such as
# instructions and cycles, produces misleading results.
$event_filter = $event;
$event_defaulted = 1;
} elsif ($event ne $event_filter) {
if ($event_defaulted and $event_warning == 0) {
# only print this warning if necessary:
# when we defaulted and there was
# multiple event types.
print STDERR "Filtering for events of type: $event\n";
$event_warning = 1;
}
next;
}
}
if (not $period) {
$period = 1
}
($m_pid, $m_tid, $m_period) = ($pid, $tid, $period);
if ($include_tid) {
$pname = "$comm-$m_pid/$m_tid";
} elsif ($include_pid) {
$pname = "$comm-$m_pid";
} else {
$pname = "$comm";
}
$pname =~ tr/ /_/;
#
# stack line
#
} elsif (/^\s*(\w+)\s*(.+) \((.*)\)/) {
# ignore filtered samples
next if not $pname;
my ($pc, $rawfunc, $mod) = ($1, $2, $3);
if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) {
my $inlineRes = inline($pc, $rawfunc, $mod);
# - empty result this happens e.g., when $mod does not exist or is a path to a compressed kernel module
# if this happens, the user will see error message from addr2line written to stderr
# - if addr2line results in "??" , then it's much more sane to fall back than produce a '??' in graph
if($inlineRes ne "" and $inlineRes ne "??" and $inlineRes ne "??:??:0" ) {
unshift @stack, $inlineRes;
next;
}
}
# Linux 4.8 included symbol offsets in perf script output by default, eg:
# 7fffb84c9afc cpu_startup_entry+0x800047c022ec ([kernel.kallsyms])
# strip these off:
$rawfunc =~ s/\+0x[\da-f]+$//;
next if $rawfunc =~ /^\(/; # skip process names
my $is_unknown=0;
my @inline;
for (split /\->/, $rawfunc) {
my $func = $_;
if ($func eq "[unknown]") {
if ($mod ne "[unknown]") { # use module name instead, if known
$func = $mod;
$func =~ s/.*\///;
} else {
$func = "unknown";
$is_unknown=1;
}
if ($include_addrs) {
$func = "\[$func \<$pc\>\]";
} else {
$func = "\[$func\]";
}
}
if ($tidy_generic) {
$func =~ s/;/:/g;
if ($func !~ m/\.\(.*\)\./) {
# This doesn't look like a Go method name (such as
# "net/http.(*Client).Do"), so everything after the first open
# paren (that is not part of an "(anonymous namespace)") is
# just noise.
$func =~ s/\((?!anonymous namespace\)).*//;
}
# now tidy this horrible thing:
# 13a80b608e0a RegExp:[&<>\"\'] (/tmp/perf-7539.map)
$func =~ tr/"\'//d;
# fall through to $tidy_java
}
if ($tidy_java and $pname =~ m/^java/) {
# along with $tidy_generic, converts the following:
# Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/ContextAction;)Ljava/lang/Object;
# Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/C
# Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
# into:
# org/mozilla/javascript/ContextFactory:.call
# org/mozilla/javascript/ContextFactory:.call
# org/mozilla/javascript/MemberBox:.init
$func =~ s/^L// if $func =~ m:/:;
}
#
# Annotations
#
# detect inlined from the @inline array
# detect kernel from the module name; eg, frames to parse include:
# ffffffff8103ce3b native_safe_halt ([kernel.kallsyms])
# 8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux)
# 7d8 ipv4_conntrack_local+0x7f8f80b8 ([nf_conntrack_ipv4])
# detect jit from the module name; eg:
# 7f722d142778 Ljava/io/PrintStream;::print (/tmp/perf-19982.map)
if (scalar(@inline) > 0) {
$func .= "_[i]" unless $func =~ m/\_\[i\]/; # inlined
} elsif ($annotate_kernel == 1 && $mod =~ m/(^\[|vmlinux$)/ && $mod !~ /unknown/) {
$func .= "_[k]"; # kernel
} elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) {
$func .= "_[j]" unless $func =~ m/\_\[j\]/; # jitted
}
#
# Source lines
#
#
# Sample outputs:
# | a.out 35081 252436.005167: 667783 cycles:
# | 408ebb some_method_name+0x8b (/full/path/to/a.out)
# | uniform_int_dist.h:300
# | 4069f5 main+0x935 (/full/path/to/a.out)
# | file.cpp:137
# | 7f6d2148eb25 __libc_start_main+0xd5 (/lib64/libc-2.33.so)
# | libc-2.33.so[27b25]
#
# | a.out 35081 252435.738165: 306459 cycles:
# | 7f6d213c2750 [unknown] (/usr/lib64/libkmod.so.2.3.6)
# | libkmod.so.2.3.6[6750]
#
# | a.out 35081 252435.738373: 315813 cycles:
# | 7f6d215ca51b __strlen_avx2+0x4b (/lib64/libc-2.33.so)
# | libc-2.33.so[16351b]
# | 7ffc71ee9580 [unknown] ([unknown])
# |
#
# | a.out 35081 252435.718940: 247984 cycles:
# | ffffffff814f9302 up_write+0x32 ([kernel.kallsyms])
# | [kernel.kallsyms][ffffffff814f9302]
if($srcline_in_input and not $is_unknown){
$_ = <>;
chomp;
s/\[.*?\]//g;
s/^\s*//g;
s/\s*$//g;
$func.=':'.$_ unless $_ eq "";
}
push @inline, $func;
}
unshift @stack, @inline;
} else {
warn "Unrecognized line: $_";
}
}
foreach my $k (sort { $a cmp $b } keys %collapsed) {
print "$k $collapsed{$k}\n";
}

View File

@@ -0,0 +1,74 @@
#!/usr/bin/env perl
#
# Copyright (c) 2014 Ed Maste. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# stackcollapse-pmc.pl collapse hwpmc samples into single lines.
#
# Parses a list of multiline stacks generated by "hwpmc -G", and outputs a
# semicolon-separated stack followed by a space and a count.
#
# Usage:
# pmcstat -S unhalted-cycles -O pmc.out
# pmcstat -R pmc.out -z16 -G pmc.graph
# stackcollapse-pmc.pl pmc.graph > pmc.stack
#
# Example input:
#
# 03.07% [17] witness_unlock @ /boot/kernel/kernel
# 70.59% [12] __mtx_unlock_flags
# 16.67% [2] selfdfree
# 100.0% [2] sys_poll
# 100.0% [2] amd64_syscall
# 08.33% [1] pmap_ts_referenced
# 100.0% [1] vm_pageout
# 100.0% [1] fork_exit
# ...
#
# Example output:
#
# amd64_syscall;sys_poll;selfdfree;__mtx_unlock_flags;witness_unlock 2
# amd64_syscall;sys_poll;pmap_ts_referenced;__mtx_unlock_flagsgeout;fork_exit 1
# ...
use warnings;
use strict;
my @stack;
my $prev_count;
my $prev_indent = -1;
while (defined($_ = <>)) {
if (m/^( *)[0-9.]+% \[([0-9]+)\]\s*(\S+)/) {
my $indent = length($1);
if ($indent <= $prev_indent) {
print join(';', reverse(@stack[0 .. $prev_indent])) .
" $prev_count\n";
}
$stack[$indent] = $3;
$prev_count = $2;
$prev_indent = $indent;
}
}
print join(';', reverse(@stack[0 .. $prev_indent])) . " $prev_count\n";

View File

@@ -0,0 +1,60 @@
#!/usr/bin/perl -ws
#
# stackcollapse-recursive Collapse direct recursive backtraces
#
# Post-process a stack list and merge direct recursive calls:
#
# Example input:
#
# main;recursive;recursive;recursive;helper 1
#
# Output:
#
# main;recursive;helper 1
#
# Copyright 2014 Gabriel Corona. All rights reserved.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
my %stacks;
while(<>) {
chomp;
my ($stack_, $value) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
if ($stack_) {
my @stack = split(/;/, $stack_);
my @result = ();
my $i;
my $last="";
for($i=0; $i!=@stack; ++$i) {
if(!($stack[$i] eq $last)) {
$result[@result] = $stack[$i];
$last = $stack[$i];
}
}
$stacks{join(";", @result)} += $value;
}
}
foreach my $k (sort { $a cmp $b } keys %stacks) {
print "$k $stacks{$k}\n";
}

View File

@@ -0,0 +1,231 @@
#!/usr/bin/awk -f
#
# Uses MacOS' /usr/bin/sample to generate a flamegraph of a process
#
# Usage:
#
# sudo sample [pid] -file /dev/stdout | stackcollapse-sample.awk | flamegraph.pl
#
# Options:
#
# The output will show the name of the library/framework at the call-site
# with the form AppKit`NSApplication or libsystem`start_wqthread.
#
# If showing the framework or library name is not required, pass
# MODULES=0 as an argument of the sample program.
#
# The generated SVG will be written to the output stream, and can be piped
# into flamegraph.pl directly, or written to a file for conversion later.
#
# ---
#
# Copyright (c) 2017, Apple Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
BEGIN {
# Command line options
MODULES = 1 # Allows the user to enable/disable printing of modules.
# Internal variables
_FOUND_STACK = 0 # Found the stack traces in the output.
_LEVEL = -1 # The current level of indentation we are running.
# The set of symbols to ignore for 'waiting' threads, for ease of use.
# This will hide waiting threads from the view, making it easier to
# see what is actually running in the sample. These may be adjusted
# as necessary or appended to if other symbols need to be filtered out.
_IGNORE["libsystem_kernel`__psynch_cvwait"] = 1
_IGNORE["libsystem_kernel`__select"] = 1
_IGNORE["libsystem_kernel`__semwait_signal"] = 1
_IGNORE["libsystem_kernel`__ulock_wait"] = 1
_IGNORE["libsystem_kernel`__wait4"] = 1
_IGNORE["libsystem_kernel`__workq_kernreturn"] = 1
_IGNORE["libsystem_kernel`kevent"] = 1
_IGNORE["libsystem_kernel`mach_msg_trap"] = 1
_IGNORE["libsystem_kernel`read"] = 1
_IGNORE["libsystem_kernel`semaphore_wait_trap"] = 1
# The same set of symbols as above, without the module name.
_IGNORE["__psynch_cvwait"] = 1
_IGNORE["__select"] = 1
_IGNORE["__semwait_signal"] = 1
_IGNORE["__ulock_wait"] = 1
_IGNORE["__wait4"] = 1
_IGNORE["__workq_kernreturn"] = 1
_IGNORE["kevent"] = 1
_IGNORE["mach_msg_trap"] = 1
_IGNORE["read"] = 1
_IGNORE["semaphore_wait_trap"] = 1
}
# This is the first line in the /usr/bin/sample output that indicates the
# samples follow subsequently. Until we see this line, the rest is ignored.
/^Call graph/ {
_FOUND_STACK = 1
}
# This is found when we have reached the end of the stack output.
# Identified by the string "Total number in stack (...)".
/^Total number/ {
_FOUND_STACK = 0
printStack(_NEST,0)
}
# Prints the stack from FROM to TO (where FROM > TO)
# Called when indenting back from a previous level, or at the end
# of processing to flush the last recorded sample
function printStack(FROM,TO) {
# We ignore certain blocking wait states, in the absence of being
# able to filter these threads from collection, otherwise
# we'll end up with many threads of equal length that represent
# the total time the sample was collected.
#
# Note that we need to collect the information to ensure that the
# timekeeping for the parental functions is appropriately adjusted
# so we just avoid printing it out when that occurs.
_PRINT_IT = !_IGNORE[_NAMES[FROM]]
# We run through all the names, from the root to the leaf, so that
# we generate a line that flamegraph.pl will like, of the form:
# Thread1234;example`main;example`otherFn 1234
for(l = FROM; l>=TO; l--) {
if (_PRINT_IT) {
printf("%s", _NAMES[0])
for(i=1; i<=l; i++) {
printf(";%s", _NAMES[i])
}
print " " _TIMES[l]
}
# We clean up our current state to avoid bugs.
delete _NAMES[l]
delete _TIMES[l]
}
}
# This is where we process each line, of the form:
# 5130 Thread_8749954
# + 5130 start_wqthread (in libsystem_pthread.dylib) ...
# + 4282 _pthread_wqthread (in libsystem_pthread.dylib) ...
# + ! 4282 __doworkq_kernreturn (in libsystem_kernel.dylib) ...
# + 848 _pthread_wqthread (in libsystem_pthread.dylib) ...
# + 848 __doworkq_kernreturn (in libsystem_kernel.dylib) ...
_FOUND_STACK && match($0,/^ [^0-9]*[0-9]/) {
# We maintain two counters:
# _LEVEL: the high water mark of the indentation level we have seen.
# _NEST: the current indentation level.
#
# We keep track of these two levels such that when the nesting level
# decreases, we print out the current state of where we are.
_NEST=(RLENGTH-5)/2
sub(/^[^0-9]*/,"") # Normalise the leading content so we start with time.
_TIME=$1 # The time recorded by 'sample', first integer value.
# The function name is in one or two parts, depending on what kind of
# function it is.
#
# If it is a standard C or C++ function, it will be of the form:
# exampleFunction
# Example::Function
#
# If it is an Objective-C funtion, it will be of the form:
# -[NSExample function]
# +[NSExample staticFunction]
# -[NSExample function:withParameter]
# +[NSExample staticFunction:withParameter:andAnother]
_FN1 = $2
_FN2 = $3
# If it is a standard C or C++ function then the following word will
# either be blank, or the text '(in', so we jut use the first one:
if (_FN2 == "(in" || _FN2 == "") {
_FN =_FN1
} else {
# Otherwise we concatenate the first two parts with .
_FN = _FN1 "." _FN2
}
# Modules are shown with '(in libfoo.dylib)' or '(in AppKit)'
_MODULE = ""
match($0, /\(in [^)]*\)/)
if (RSTART > 0 && MODULES) {
# Strip off the '(in ' (4 chars) and the final ')' char (1 char)
_MODULE = substr($0, RSTART+4, RLENGTH-5)
# Remove the .dylib function, since it adds no value.
gsub(/\.dylib/, "", _MODULE)
# The function name is 'module`functionName'
_FN = _MODULE "`" _FN
}
# Now we have set up the variables, we can decide how to apply it
# If we are descending in the nesting, we don't print anything out:
# a
# ab
# abc
#
# We only print out something when we go back a level, or hit the end:
# abcd
# abe < prints out the stack up until this point, i.e. abcd
# We store a pair of arrays, indexed by the nesting level:
#
# _TIMES - a list of the time reported to that function
# _NAMES - a list of the function names for each current stack trace
# If we are backtracking, we need to flush the current output.
if (_NEST <= _LEVEL) {
printStack(_LEVEL,_NEST)
}
# Record the name and time of the function where we are.
_NAMES[_NEST] = _FN
_TIMES[_NEST] = _TIME
# We subtract the time we took from our parent so we don't double count.
if (_NEST > 0) {
_TIMES[_NEST-1] -= _TIME
}
# Raise the high water mark of the level we have reached.
_LEVEL = _NEST
}

View File

@@ -0,0 +1,84 @@
#!/usr/bin/perl -w
#
# stackcollapse-stap.pl collapse multiline SystemTap stacks
# into single lines.
#
# Parses a multiline stack followed by a number on a separate line, and
# outputs a semicolon separated stack followed by a space and the number.
# If memory addresses (+0xd) are present, they are stripped, and resulting
# identical stacks are colased with their counts summed.
#
# USAGE: ./stackcollapse.pl infile > outfile
#
# Example input:
#
# 0xffffffff8103ce3b : native_safe_halt+0xb/0x10 [kernel]
# 0xffffffff8101c6a3 : default_idle+0x53/0x1d0 [kernel]
# 0xffffffff81013236 : cpu_idle+0xd6/0x120 [kernel]
# 0xffffffff815bf03e : rest_init+0x72/0x74 [kernel]
# 0xffffffff81aebbfe : start_kernel+0x3ba/0x3c5 [kernel]
# 2404
#
# Example output:
#
# start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2404
#
# Input may contain many stacks as generated from SystemTap.
#
# Copyright 2011 Joyent, Inc. All rights reserved.
# Copyright 2011 Brendan Gregg. All rights reserved.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# 16-Feb-2012 Brendan Gregg Created this.
use strict;
my %collapsed;
sub remember_stack {
my ($stack, $count) = @_;
$collapsed{$stack} += $count;
}
my @stack;
foreach (<>) {
chomp;
if (m/^\s*(\d+)+$/) {
remember_stack(join(";", @stack), $1);
@stack = ();
next;
}
next if (m/^\s*$/);
my $frame = $_;
$frame =~ s/^\s*//;
$frame =~ s/\+[^+]*$//;
$frame =~ s/.* : //;
$frame = "-" if $frame eq "";
unshift @stack, $frame;
}
foreach my $k (sort { $a cmp $b } keys %collapsed) {
printf "$k $collapsed{$k}\n";
}

View File

@@ -0,0 +1,98 @@
#!/usr/bin/perl -w
#
# stackcollapse-vsprof.pl
#
# Parses the CSV file containing a call tree from a visual studio profiler and produces an output suitable for flamegraph.pl.
#
# USAGE: perl stackcollapse-vsprof.pl infile > outfile
#
# WORKFLOW:
#
# This example assumes you have visual studio 2015 installed.
#
# 1. Profile C++ your application using visual studio
# 2. On visual studio, choose export the call tree as csv
# 3. Generate a flamegraph: perl stackcollapse-vsprof CallTreeSummary.csv | perl flamegraph.pl > result_vsprof.svg
#
# INPUT EXAMPLE :
#
# Level,Function Name,Inclusive Samples,Exclusive Samples,Inclusive Samples %,Exclusive Samples %,Module Name,
# 1,"main","8,735",0,100.00,0.00,"an_executable.exe",
# 2,"testing::UnitTest::Run","8,735",0,100.00,0.00,"an_executable.exe",
# 3,"boost::trim_end_iter_select<std::iterator<std::val<std::types<char> > >,boost::is_classifiedF>",306,16,3.50,0.18,"an_executable.exe",
#
# OUTPUT EXAMPLE :
#
# main;testing::UnitTest::Run;boost::trim_end_iter_select<std::iterator<std::val<std::types<char>>>,boost::is_classifiedF> 306
use strict;
sub massage_function_names;
sub parse_integer;
sub print_stack_trace;
# data initialization
my @stack = ();
my $line_number = 0;
my $previous_samples = 0;
my $num_args = $#ARGV + 1;
if ($num_args != 1) {
print "$ARGV[0]\n";
print "Usage : stackcollapse-vsprof.pl <in.cvs> > out.txt\n";
exit;
}
my $input_csv_file = $ARGV[0];
my $line_parser_rx = qr{
^\s*(\d+?), # level in the stack
("[^"]+" | [^,]+), # function name (beware of spaces)
("[^"]+" | [^,]+), # number of samples (beware of locale number formatting)
}ox;
open(my $fh, '<', $input_csv_file) or die "Can't read file '$input_csv_file' [$!]\n";
while (my $current_line = <$fh>){
$line_number = $line_number + 1;
# to discard first line which typically contains headers
next if $line_number == 1;
next if $current_line =~ /^\s*$/o;
($current_line =~ $line_parser_rx) or die "Error in regular expression at line $line_number : $current_line\n";
my $level = int $1;
my $function = massage_function_names($2);
my $samples = parse_integer($3);
my $stack_len = @stack;
#print "[DEBUG] $line_number : $level $function $samples $stack_len\n";
next if not $level;
($level <= $stack_len + 1) or die "Error in stack at line $line_number : $current_line\n";
if ($level <= $stack_len) {
print_stack_trace(\@stack, $previous_samples);
my $to_remove = $level - $stack_len - 1;
splice(@stack, $to_remove);
}
$stack_len < 1000 or die "Stack overflow at line $line_number";
push(@stack, $function);
$previous_samples = $samples;
}
print_stack_trace(\@stack, $previous_samples);
sub massage_function_names {
return ($_[0] =~ s/\s*|^"|"$//gro);
}
sub parse_integer {
return int ($_[0] =~ s/[., ]|^"|"$//gro);
}
sub print_stack_trace {
my ($stack_ref, $sample) = @_;
my $stack_trace = join(";", @$stack_ref);
print "$stack_trace $sample\n";
}

View File

@@ -0,0 +1,103 @@
#!/usr/bin/perl -w
#
# stackcollapse-vtune-mc.pl
#
# Parses the CSV file containing a call tree from Intel VTune memory-consumption profiler and produces an output suitable for flamegraph.pl.
#
# USAGE: perl stackcollapse-vtune-mc.pl [options] infile > outfile
#
# WORKFLOW:
#
# This assumes you have Intel VTune installed and on path (using Command Line)
#
# 1. Profile C++ application tachyon (example shipped with Intel VTune 2019):
#
# amplxe-cl -collect memory-consumption -r mc_tachyon -- ./tachyon
#
# 2. Export raw VTune data to csv file:
# ### for Intel VTune 2019
# amplxe-cl -R top-down -call-stack-mode all \
# -column="Allocations:Self","Allocation Size:Self","Module" \
# -report-out allocations.csv -format csv \
# -csv-delimiter comma -r mc_tachyon
#
# 3. Generate a flamegraph:
# ## Generate for allocations amount.
# perl stackcollapse-vtune-mc.pl allocations.csv > out.folded
# perl flamegraph.pl --countname=allocations out.folded > vtune_tachyon_mc.svg
#
# ## Or you can generate for allocation size in bytes.
# perl stackcollapse-vtune-mc.pl -s allocations.csv > out.folded
# perl flamegraph.pl --countname=allocations out.folded > vtune_tachyon_mc_size.svg
#
# AUTHOR: Rohith Bakkannagari
# 27-Nov-2019 UnpluggedCoder Forked from stackcollapse-vtune.pl, for memory-consumption flamegraph
use strict;
use Getopt::Long;
sub usage {
die <<USAGE_END;
Usage : $0 [options] allocations.csv > out.folded\n
--size # Accumulate allocation size in bytes instead of allocation counts.\n
NOTE : The csv file should exported by `amplxe-cl` tool with the exact -column parameter shows below.
amplxe-cl -R top-down -call-stack-mode all \
-column="Allocations:Self","Allocation Size:Self","Module" \
-report-out allocations.csv -format csv \
-csv-delimiter comma -r mc_tachyon
USAGE_END
}
# data initialization
my @stack = ();
my $rowCounter = 0; # flag for row number
my $accSize = '';
GetOptions ('size' => \$accSize)
or usage();
my $numArgs = $#ARGV + 1;
if ($numArgs != 1){
usage();
exit;
}
my $inputCSVFile = $ARGV[0];
open(my $fh, '<', $inputCSVFile) or die "Can't read file '$inputCSVFile' [$!]\n";
while (my $currLine = <$fh>){
# discard warning line
next if $rowCounter == 0 && rindex($currLine, "war:", 0) == 0;
$rowCounter = $rowCounter + 1;
# to discard first row which typically contains headers
next if $rowCounter == 1;
chomp $currLine;
#VTune - sometimes the call stack information is enclosed in double quotes (?). To remove double quotes.
$currLine =~ s/\"//g;
### for Intel VTune 2019
### CSV header should be like below
### Function Stack,Allocation Size:Self,Deallocation Size:Self,Allocations:Self,Module
$currLine =~ /(\s*)(.*?),([0-9]*?\.?[0-9]*?),([0-9]*?\.?[0-9]*?),([0-9]*?\.?[0-9]*?),(.*)/ or die "Error in regular expression on the current line $currLine\n";
my $func = $2.'('.$6.')'; # function(module)
my $depth = length ($1);
my $allocBytes = $3; # allocation size
my $allocs = $5; # allocations
my $tempString = '';
$stack [$depth] = $func;
if ($accSize){
next if $allocBytes eq '';
foreach my $i (0 .. $depth - 1) {
$tempString = $tempString.$stack[$i].";";
}
$tempString = $tempString.$func." $allocBytes\n";
} else {
next if $allocs == 0;
foreach my $i (0 .. $depth - 1) {
$tempString = $tempString.$stack[$i].";";
}
$tempString = $tempString.$func." $allocs\n";
}
print "$tempString";
}

View File

@@ -0,0 +1,97 @@
#!/usr/bin/perl -w
#
# stackcollapse-vtune.pl
#
# Parses the CSV file containing a call tree from Intel VTune hotspots profiler and produces an output suitable for flamegraph.pl.
#
# USAGE: perl stackcollapse-vtune.pl infile > outfile
#
# WORKFLOW:
#
# This assumes you have Intel VTune installed and on path (using Command Line)
#
# 1. Profile C++ application tachyon_find_hotspots (example shipped with Intel VTune 2013):
#
# amplxe-cl -collect hotspots -r result_vtune_tachyon -- ./tachyon_find_hotspots
#
# 2. Export raw VTune data to csv file:
#
##### VTune 2013 & 2015
# amplxe-cl -R top-down -report-out result_vtune_tachyon.csv -filter "Function Stack" -format csv -csv-delimiter comma -r result_vtune_tachyon
#### VTune 2016
# amplxe-cl.exe -R top-down -call-stack-mode all -column="CPU Time:Self","Module" -report-output result_vtune_tachyon.csv -filter "Function Stack" -format csv -csv-delimiter comma -r result_vtune_tachyon
#
# 3. Generate a flamegraph:
#
# perl stackcollapse-vtune result_vtune_tachyon.csv | perl flamegraph.pl > result_vtune_tachyon.svg
#
# AUTHOR: Rohith Bakkannagari
use strict;
# data initialization
my @stack = ();
my $rowCounter = 0; #flag for row number
my $numArgs = $#ARGV + 1;
if ($numArgs != 1)
{
print "$ARGV[0]\n";
print "Usage : stackcollapse-vtune.pl <out.cvs> > out.txt\n";
exit;
}
my $inputCSVFile = $ARGV[0];
my $funcOnly = '';
my $depth = 0;
my $selfTime = 0;
my $dllName = '';
open(my $fh, '<', $inputCSVFile) or die "Can't read file '$inputCSVFile' [$!]\n";
while (my $currLine = <$fh>){
$rowCounter = $rowCounter + 1;
# to discard first row which typically contains headers
next if $rowCounter == 1;
chomp $currLine;
### VTune 2013 & 2015
#VTune - sometimes the call stack information is enclosed in double quotes (?). To remove double quotes. Not necessary for XCode instruments (MAC)
$currLine =~ s/\"//g;
$currLine =~ /(\s*)(.*),(.*),.*,([0-9]*\.?[0-9]+)/ or die "Error in regular expression on the current line\n";
$dllName = $3;
$func = $dllName.'!'.$2; # Eg : m_lxe.dll!MathWorks::lxe::IrEngineDecorator::Apply
$depth = length ($1);
$selfTime = $4*1000; # selfTime in msec
### VTune 2013 & 2015
### VTune 2016
# $currLine =~ /(\s*)(.*?),([0-9]*\.?[0-9]+?),(.*)/ or die "Error in regular expression on the current line $currLine\n";
# if ($2 =~ /\"/)
# {
# $currLine =~ /(\s*)\"(.*?)\",([0-9]*\.?[0-9]+?),(.*)/ or die "Error in regular expression on the current line $currLine\n";
# $funcOnly = $2;
# $depth = length ($1);
# $selfTime = $3*1000; # selfTime in msec
# $dllName = $4;
# }
# else
# {
# $funcOnly = $2;
# $depth = length ($1);
# $selfTime = $3*1000; # selfTime in msec
# $dllName = $4;
# }
# my $func = $dllName.'!'.$funcOnly; # Eg : m_lxe.dll!MathWorks::lxe::IrEngineDecorator::Apply
### VTune 2016
my $tempString = '';
$stack [$depth] = $func;
foreach my $i (0 .. $depth - 1) {
$tempString = $tempString.$stack[$i].";";
}
$tempString = $tempString.$func." $selfTime\n";
if ($selfTime != 0){
print "$tempString";
}
}

View File

@@ -0,0 +1,69 @@
#!/usr/bin/perl -ws
#
# stackcollapse-wcp Collapse wallClockProfiler backtraces
#
# Parse a list of GDB backtraces as generated by https://github.com/jasonrohrer/wallClockProfiler
#
# Copyright 2014 Gabriel Corona. All rights reserved.
# Portions Copyright 2020 Ștefan Talpalaru <stefantalpalaru@yahoo.com>
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
use strict;
my $current = "";
my $start_processing = 0;
my $samples = 0;
my %stacks;
while(<>) {
s/^\s+|\s+$//g;
if (m/^Full stacks/) {
$start_processing = 1;
next;
}
if (not $start_processing) {
next;
}
if(m/^\d+\.\d+% =+ \((\d+) samples\)/) {
# 99.791% ===================================== (17194 samples)
$samples = $1;
next;
} elsif (m/^\d+: (.*)$/) {
# 1: poll__YNjd8fE6xG8CRNwfLnrx0g_2 (at /mnt/sde1/storage/nim-beacon-chain-clean/vendor/nim-chronos/chronos/asyncloop.nim:343)
my $function = $1;
if ($current eq "") {
$current = $function;
} else {
$current = $function . ";" . $current;
}
} elsif (m/^$/ and $current ne "") {
$stacks{$current} += $samples;
$current = "";
}
}
foreach my $k (sort { $a cmp $b } keys %stacks) {
print "$k $stacks{$k}\n";
}

View File

@@ -0,0 +1,197 @@
#!/usr/bin/php
<?php
#
# Copyright 2018 Miriam Lauter (lauter.miriam@gmail.com). All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# (http://www.gnu.org/copyleft/gpl.html)
#
# 13-Apr-2018 Miriam Lauter Created this.
ini_set('error_log', null);
$optind = null;
$args = getopt("htc", ["help"], $optind);
if (isset($args['h']) || isset($args['help'])) {
usage();
}
function usage($exit = 0) {
echo <<<EOT
stackcollapse-xdebug.php collapse php function traces into single lines.
Parses php samples generated by xdebug with xdebug.trace_format = 1
and outputs stacks as single lines, with methods separated by semicolons,
and then a space and an occurrence count. For use with flamegraph.pl.
See https://github.com/brendangregg/FlameGraph.
USAGE: ./stackcollapse-xdebug.php [OPTIONS] infile > outfile
-h --help Show this message
-t Weight stack counts by duration using the time index in the trace (default)
-c Invocation counts only. Simply count stacks in the trace and sum duplicates, don't weight by duration.
Example input:
For more info on xdebug and generating traces see
https://xdebug.org/docs/execution_trace.
Version: 2.0.0RC4-dev
TRACE START [2007-05-06 18:29:01]
1 0 0 0.010870 114112 {main} 1 ../trace.php 0
2 1 0 0.032009 114272 str_split 0 ../trace.php 8
2 1 1 0.032073 116632
2 2 0 0.033505 117424 ret_ord 1 ../trace.php 10
3 3 0 0.033531 117584 ord 0 ../trace.php 5
3 3 1 0.033551 117584
...
TRACE END [2007-05-06 18:29:01]
Example output:
- c
{main};str_split 1
{main};ret_ord;ord 6
-t
{main} 23381
{main};str_split 64
{main};ret_ord 215
{main};ret_ord;ord 106
EOT;
exit($exit);
}
function collapseStack(array $stack, string $func_name_key): string {
return implode(';', array_column($stack, $func_name_key));
}
function addCurrentStackToStacks(array $stack, float $dur, array &$stacks) {
$collapsed = implode(';', $stack);
$duration = SCALE_FACTOR * $dur;
if (array_key_exists($collapsed, $stacks)) {
$stacks[$collapsed] += $duration;
} else {
$stacks[$collapsed] = $duration;
}
}
function isEOTrace(string $l) {
$pattern = "/^(\\t|TRACE END)/";
return preg_match($pattern, $l);
}
$filename = $argv[$optind] ?? null;
if ($filename === null) {
usage(1);
}
$do_time = !isset($args['c']);
// First make sure our file is consistently formatted with only one \t delimiting each field
$out = [];
$retval = null;
exec("sed -in 's/\t\+/\t/g' " . escapeshellarg($filename), $out, $retval);
if ($retval !== 0) {
usage(1);
}
$handle = fopen($filename, 'r');
if ($handle === false) {
echo "Unable to open $filename \n\n";
usage(1);
}
// Loop till we find TRACE START
while ($l = fgets($handle)) {
if (strpos($l, "TRACE START") === 0) {
break;
}
}
const SCALE_FACTOR = 1000000;
$stacks = [];
$current_stack = [];
$was_exit = false;
$prev_start_time = 0;
if ($do_time) {
// Weight counts by duration
// Xdebug trace time indices have 6 sigfigs of precision
// We have a perfect trace, but let's instead pretend that
// this was collected by sampling at 10^6 Hz
// then each millionth of a second this stack took to execute is 1 count
while ($l = fgets($handle)) {
if (isEOTrace($l)) {
break;
}
$parts = explode("\t", $l);
list($level, $fn_no, $is_exit, $time) = $parts;
if ($is_exit) {
if (empty($current_stack)) {
echo "[WARNING] Found function exit without corresponding entrance. Discarding line. Check your input.\n";
continue;
}
addCurrentStackToStacks($current_stack, $time - $prev_start_time, $stacks);
array_pop($current_stack);
} else {
$func_name = $parts[5];
if (!empty($current_stack)) {
addCurrentStackToStacks($current_stack, $time - $prev_start_time, $stacks);
}
$current_stack[] = $func_name;
}
$prev_start_time = $time;
}
} else {
// Counts only
while ($l = fgets($handle)) {
if (isEOTrace($l)) {
break;
}
$parts = explode("\t", $l);
list($level, $fn_no, $is_exit) = $parts;
if ($is_exit === "1") {
if (!$was_exit) {
$collapsed = implode(";", $current_stack);
if (array_key_exists($collapsed, $stacks)) {
$stacks[$collapsed]++;
} else {
$stacks[$collapsed] = 1;
}
}
array_pop($current_stack);
$was_exit = true;
} else {
$func_name = $parts[5];
$current_stack[] = $func_name;
$was_exit = false;
}
}
}
foreach ($stacks as $stack => $count) {
echo "$stack $count\n";
}

View File

@@ -0,0 +1,109 @@
#!/usr/bin/perl -w
#
# stackcollapse.pl collapse multiline stacks into single lines.
#
# Parses a multiline stack followed by a number on a separate line, and
# outputs a semicolon separated stack followed by a space and the number.
# If memory addresses (+0xd) are present, they are stripped, and resulting
# identical stacks are colased with their counts summed.
#
# USAGE: ./stackcollapse.pl infile > outfile
#
# Example input:
#
# unix`i86_mwait+0xd
# unix`cpu_idle_mwait+0xf1
# unix`idle+0x114
# unix`thread_start+0x8
# 1641
#
# Example output:
#
# unix`thread_start;unix`idle;unix`cpu_idle_mwait;unix`i86_mwait 1641
#
# Input may contain many stacks, and can be generated using DTrace. The
# first few lines of input are skipped (see $headerlines).
#
# Copyright 2011 Joyent, Inc. All rights reserved.
# Copyright 2011 Brendan Gregg. All rights reserved.
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at docs/cddl1.txt or
# http://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at docs/cddl1.txt.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# 14-Aug-2011 Brendan Gregg Created this.
use strict;
my $headerlines = 3; # number of input lines to skip
my $includeoffset = 0; # include function offset (except leafs)
my %collapsed;
sub remember_stack {
my ($stack, $count) = @_;
$collapsed{$stack} += $count;
}
my $nr = 0;
my @stack;
foreach (<>) {
next if $nr++ < $headerlines;
chomp;
if (m/^\s*(\d+)+$/) {
my $count = $1;
my $joined = join(";", @stack);
# trim leaf offset if these were retained:
$joined =~ s/\+[^+]*$// if $includeoffset;
remember_stack($joined, $count);
@stack = ();
next;
}
next if (m/^\s*$/);
my $frame = $_;
$frame =~ s/^\s*//;
$frame =~ s/\+[^+]*$// unless $includeoffset;
# Remove arguments from C++ function names:
$frame =~ s/(::.*)[(<].*/$1/;
$frame = "-" if $frame eq "";
my @inline;
for (split /\->/, $frame) {
my $func = $_;
# Strip out L and ; included in java stacks
$func =~ tr/\;/:/;
$func =~ s/^L//;
$func .= "_[i]" if scalar(@inline) > 0; #inlined
push @inline, $func;
}
unshift @stack, @inline;
}
foreach my $k (sort { $a cmp $b } keys %collapsed) {
print "$k $collapsed{$k}\n";
}

View File

@@ -0,0 +1,26 @@
#!/bin/bash
#
# test.sh - Check flame graph software vs test result files.
#
# This is used to detect regressions in the flame graph software.
# See record-test.sh, which refreshes these files after intended software
# changes.
#
# Currently only tests stackcollapse-perf.pl.
set -euo pipefail
set -x
set -v
# ToDo: add some form of --inline, and --inline --context tests. These are
# tricky since they use addr2line, whose output will vary based on the test
# system's binaries and symbol tables.
for opt in pid tid kernel jit all addrs; do
for testfile in test/*.txt ; do
echo testing $testfile : $opt
outfile=${testfile#*/}
outfile=test/results/${outfile%.txt}"-collapsed-${opt}.txt"
perl ./stackcollapse-perf.pl --"${opt}" "${testfile}" 2> /dev/null | diff -u - "${outfile}"
perl ./flamegraph.pl "${outfile}" > /dev/null
done
done

View File

@@ -6,13 +6,14 @@ set -eo pipefail
# parse the command line
#
usage() { echo "usage: $(basename "$0") [--cli <path>] [--name <cli-name>] [--baseline-cli <path>] [--suite <suite>] [--json <path>] [--zip <path>] [--verbose] [--debug]"; }
usage() { echo "usage: $(basename "$0") [--cli <path>] [--name <cli-name>] [--baseline-cli <path>] [--suite <suite>] [--json <path>] [--flamegraph] [--zip <path>] [--verbose] [--debug]"; }
TEST_CLI="git"
TEST_CLI_NAME=
BASELINE_CLI=
SUITE=
JSON_RESULT=
FLAMEGRAPH=
ZIP_RESULT=
OUTPUT_DIR=
VERBOSE=
@@ -66,6 +67,8 @@ for a in "$@"; do
NEXT="json"
elif [[ "${a}" == "-j"* ]]; then
JSON_RESULT="${a/-j/}"
elif [ "${a}" = "-F" ] || [ "${a}" == "--flamegraph" ]; then
FLAMEGRAPH=1
elif [ "${a}" = "-z" ] || [ "${a}" == "--zip" ]; then
NEXT="zip"
elif [[ "${a}" == "-z"* ]]; then
@@ -209,8 +212,9 @@ for TEST_PATH in "${BENCHMARK_DIR}"/*; do
fi
OUTPUT_FILE="${OUTPUT_DIR}/${TEST_FILE}.out"
JSON_FILE="${OUTPUT_DIR}/${TEST_FILE}.json"
ERROR_FILE="${OUTPUT_DIR}/${TEST_FILE}.err"
JSON_FILE="${OUTPUT_DIR}/${TEST_FILE}.json"
FLAMEGRAPH_FILE="${OUTPUT_DIR}/${TEST_FILE}.svg"
FAILED=
{
@@ -248,15 +252,54 @@ for TEST_PATH in "${BENCHMARK_DIR}"/*; do
two_mean=$(humanize_secs "${two_mean}")
two_stddev=$(humanize_secs "${two_stddev}")
echo "${one_mean} ± ${one_stddev} vs ${two_mean} ± ${two_stddev}"
echo -n "${one_mean} ± ${one_stddev} vs ${two_mean} ± ${two_stddev}"
else
echo "${one_mean} ± ${one_stddev}"
echo -n "${one_mean} ± ${one_stddev}"
fi
done
fi
# add our metadata to the hyperfine json result
jq ". |= { \"name\": \"${TEST_NAME}\" } + ." < "${JSON_FILE}" > "${JSON_FILE}.new" && mv "${JSON_FILE}.new" "${JSON_FILE}"
# run with flamegraph output if requested
if [ "${FLAMEGRAPH}" ]; then
PROFILER_OUTPUT_FILE="${OUTPUT_DIR}/${TEST_FILE}-profiler.out"
PROFILER_ERROR_FILE="${OUTPUT_DIR}/${TEST_FILE}-profiler.err"
if [ "${VERBOSE}" = "1" ]; then
echo " Profiling and creating flamegraph ..."
else
echo -n " -- profiling..."
fi
RESULT=
{ ${TEST_PATH} --cli "${TEST_CLI}" --profile --flamegraph "${FLAMEGRAPH_FILE}" >>"${PROFILER_OUTPUT_FILE}" 2>>"${PROFILER_ERROR_FILE}" || RESULT=$?; }
if [ "${VERBOSE}" = "1" ]; then
indent < "${PROFILER_OUTPUT_FILE}"
indent < "${PROFILER_ERROR_FILE}"
else
# error code 2 indicates a non-fatal error creating
# the flamegraph
if [ "${RESULT}" = "" -o "${RESULT}" = "0" ]; then
echo " done."
elif [ "${RESULT}" = "2" ]; then
echo " missing resources."
elif [ "${RESULT}" = "3" ]; then
echo " sample too small."
indent < "${PROFILER_ERROR_FILE}"
elif [ "${RESULT}" = "4" ]; then
echo " unavailable."
else
echo " failed."
indent < "${PROFILER_ERROR_FILE}"
ANY_FAILED=1
fi
fi
fi
done
TIME_END=$(time_in_ms)
@@ -308,6 +351,7 @@ if [ "$CLEANUP_DIR" = "1" ]; then
rm -f "${OUTPUT_DIR}"/*.out
rm -f "${OUTPUT_DIR}"/*.err
rm -f "${OUTPUT_DIR}"/*.json
rm -f "${OUTPUT_DIR}"/*.svg
rmdir "${OUTPUT_DIR}"
fi

View File

@@ -7,15 +7,17 @@ set -eo pipefail
# command-line parsing
#
usage() { echo "usage: $(basename "$0") [--cli <path>] [--baseline-cli <path>] [--output-style <style>] [--json <path>]"; }
usage() { echo "usage: $(basename "$0") [--cli <path>] [--baseline-cli <path>] [--output-style <style>] [--json <path>] [--profile] [--flamegraph <path>]"; }
NEXT=
BASELINE_CLI=
TEST_CLI="git"
JSON=
SHOW_OUTPUT=
JSON=
PROFILE=
FLAMEGRAPH=
if [ "$CI" != "" ]; then
if [ "$CI" != "" -a -t 1 ]; then
OUTPUT_STYLE="color"
else
OUTPUT_STYLE="auto"
@@ -23,6 +25,9 @@ fi
HELP_GIT_REMOTE="https://github.com/git/git"
HELP_LINUX_REMOTE="https://github.com/torvalds/linux"
HELP_RESOURCE_REPO="https://github.com/libgit2/benchmark-resources"
BENCHMARK_DIR=${BENCHMARK_DIR:=$(dirname "$0")}
#
# parse the arguments to the outer script that's including us; these are arguments that
@@ -42,6 +47,9 @@ for a in "$@"; do
elif [ "${NEXT}" = "json" ]; then
JSON="${a}"
NEXT=
elif [ "${NEXT}" = "flamegraph" ]; then
FLAMEGRAPH="${a}"
NEXT=
elif [ "${a}" = "-c" ] || [ "${a}" = "--cli" ]; then
NEXT="cli"
elif [[ "${a}" == "-c"* ]]; then
@@ -52,13 +60,19 @@ for a in "$@"; do
BASELINE_CLI="${a/-b/}"
elif [ "${a}" == "--output-style" ]; then
NEXT="output-style"
elif [ "${a}" = "-j" ] || [ "${a}" = "--json" ]; then
NEXT="json"
elif [[ "${a}" == "-j"* ]]; then
JSON="${a}"
elif [ "${a}" = "--show-output" ]; then
SHOW_OUTPUT=1
OUTPUT_STYLE=
elif [ "${a}" = "-j" ] || [ "${a}" = "--json" ]; then
NEXT="json"
elif [[ "${a}" == "-j"* ]]; then
JSON="${a/-j/}"
elif [ "${a}" = "-p" ] || [ "${a}" = "--profile" ]; then
PROFILE=1
elif [ "${a}" = "-F" ] || [ "${a}" = "--flamegraph" ]; then
NEXT="flamegraph"
elif [[ "${a}" == "-F"* ]]; then
FLAMEGRAPH="${a/-F/}"
else
echo "$(basename "$0"): unknown option: ${a}" 1>&2
usage 1>&2
@@ -99,7 +113,7 @@ temp_dir() {
fi
}
create_preparescript() {
create_prepare_script() {
# add some functions for users to use in preparation
cat >> "${SANDBOX_DIR}/prepare.sh" << EOF
set -e
@@ -205,6 +219,30 @@ create_preparescript() {
cp -R "\${RESOURCES_DIR}/\${RESOURCE}" "\${SANDBOX_DIR}/"
}
sandbox_resource() {
RESOURCE="\${1}"
if [ "\${RESOURCE}" = "" ]; then
echo "usage: sandbox_resource <path>" 1>&2
exit 1
fi
RESOURCE_UPPER=\$(echo "\${RESOURCE}" | tr '[:lower:]' '[:upper:]' | sed -e "s/-/_/g")
RESOURCE_PATH=\$(eval echo "\\\${BENCHMARK_\${RESOURCE_UPPER}_PATH}")
if [ "\${RESOURCE_PATH}" = "" -a "\${BENCHMARK_RESOURCES_PATH}" != "" ]; then
RESOURCE_PATH="\${BENCHMARK_RESOURCES_PATH}/\${RESOURCE}"
fi
if [ ! -f "\${RESOURCE_PATH}" ]; then
echo "sandbox: the resource \"\${RESOURCE}\" does not exist"
exit 1
fi
rm -rf "\${SANDBOX_DIR:?}/\${RESOURCE}"
cp -R "\${RESOURCE_PATH}" "\${SANDBOX_DIR}/\${RESOURCE}"
}
sandbox_repo() {
RESOURCE="\${1}"
@@ -229,8 +267,8 @@ create_preparescript() {
exit 1
fi
REPO_UPPER=\$(echo "\${1}" | tr '[:lower:]' '[:upper:]')
REPO_URL=\$(eval echo "\\\${BENCHMARK_\${REPO_UPPER}_REPOSITORY}")
REPO_UPPER=\$(echo "\${REPO}" | tr '[:lower:]' '[:upper:]')
REPO_URL=\$(eval echo "\\\${BENCHMARK_\${REPO_UPPER}_PATH}")
if [ "\${REPO_URL}" = "" ]; then
echo "\$0: unknown repository '\${REPO}'" 1>&2
@@ -252,10 +290,7 @@ EOF
echo "${SANDBOX_DIR}/prepare.sh"
}
create_runscript() {
SCRIPT_NAME="${1}"; shift
CLI_PATH="${1}"; shift
start_dir() {
if [[ "${CHDIR}" = "/"* ]]; then
START_DIR="${CHDIR}"
elif [ "${CHDIR}" != "" ]; then
@@ -264,6 +299,15 @@ create_runscript() {
START_DIR="${SANDBOX_DIR}"
fi
echo "${START_DIR}"
}
create_run_script() {
SCRIPT_NAME="${1}"; shift
CLI_PATH="${1}"; shift
START_DIR=$(start_dir)
# our run script starts by chdir'ing to the sandbox or repository directory
echo -n "cd \"${START_DIR}\" && \"${CLI_PATH}\"" >> "${SANDBOX_DIR}/${SCRIPT_NAME}.sh"
@@ -271,16 +315,12 @@ create_runscript() {
echo -n " \"${a}\"" >> "${SANDBOX_DIR}/${SCRIPT_NAME}.sh"
done
echo "" >> "${SANDBOX_DIR}/${SCRIPT_NAME}.sh"
echo "${SANDBOX_DIR}/${SCRIPT_NAME}.sh"
}
gitbench_usage() { echo "usage: gitbench command..."; }
#
# this is the function that the outer script calls to actually do the sandboxing and
# invocation of hyperfine.
#
gitbench() {
parse_arguments() {
NEXT=
# this test should run the given command in preparation of the tests
@@ -336,34 +376,92 @@ gitbench() {
exit 1
fi
# sanity check
echo "PREPARE=\"${PREPARE}\""
echo "CHDIR=\"${CHDIR}\""
echo "WARMUP=\"${WARMUP}\""
for a in "${SANDBOX[@]}"; do
if [ ! -d "$(resources_dir)/${a}" ]; then
echo "$0: no resource '${a}' found" 1>&2
exit 1
fi
echo -n "GIT_ARGUMENTS=("
for arg in $@; do
echo -n " \"${arg}\""
done
echo " )"
}
if [ "$REPOSITORY" != "" ]; then
if [ ! -d "$(resources_dir)/${REPOSITORY}" ]; then
echo "$0: no repository resource '${REPOSITORY}' found" 1>&2
exit 1
fi
gitbench_usage() { echo "usage: gitbench command..."; }
exec_profiler() {
if [ "${BASELINE_CLI}" != "" ]; then
echo "$0: baseline is not supported in profiling mode" 1>&2
exit 1
fi
# set up our sandboxing
if [ "${SHOW_OUTPUT}" != "" ]; then
echo "$0: show-output is not supported in profiling mode" 1>&2
exit 1
fi
SANDBOX_DIR="$(temp_dir)"
if [ "$JSON" != "" ]; then
echo "$0: json is not supported in profiling mode" 1>&2
exit 1
fi
SYSTEM=$(uname -s)
TEST_CLI_PATH=$(fullpath "${TEST_CLI}")
START_DIR=$(start_dir)
if [ "${SYSTEM}" = "Linux" ]; then
if [ "${OUTPUT_STYLE}" = "color" ]; then
COLOR_ARG="always"
elif [ "${OUTPUT_STYLE}" = "none" ]; then
COLOR_ARG="never"
elif [ "${OUTPUT_STYLE}" = "auto" ]; then
COLOR_ARG="auto"
else
echo "$0: unknown output-style option" 1>&2
exit 1
fi
bash "${PREPARE_SCRIPT}"
( cd "${START_DIR}" && perf record -F 999 -a -g -o "${SANDBOX_DIR}/perf.data" -- "${TEST_CLI_PATH}" "${GIT_ARGUMENTS[@]}" )
# we may not have samples if the process exited quickly
SAMPLES=$(perf report -D -i "${SANDBOX_DIR}/perf.data" | { grep "RECORD_SAMPLE" || test $? = 1; } | wc -l)
if [ "${SAMPLES}" = "0" ]; then
echo "$0: no profiling samples created" 1>&2
exit 3
fi
if [ "${FLAMEGRAPH}" = "" ]; then
perf report --stdio --stdio-color "${COLOR_ARG}" -i "${SANDBOX_DIR}/perf.data"
else
perf script -i "${SANDBOX_DIR}/perf.data" | "${BENCHMARK_DIR}/_script/flamegraph/stackcollapse-perf.pl" > "${SANDBOX_DIR}/perf.data.folded"
perl "${BENCHMARK_DIR}/_script/flamegraph/flamegraph.pl" "${SANDBOX_DIR}/perf.data.folded" > "${FLAMEGRAPH}"
fi
else
# macos - requires system integrity protection is disabled :(
# dtrace -s "bash ${TEST_RUN_SCRIPT}" -o filename -n "profile-997 /execname == \"${TEST_CLI}\"/ { @[ustack(100)] = count(); }"
echo "$0: profiling is not supported on ${SYSTEM}" 1>&2
exit 4
fi
}
exec_hyperfine() {
if [ "$FLAMEGRAPH" != "" ]; then
echo "$0: flamegraph is not supported in standard mode" 1>&2
exit 1
fi
if [ "${BASELINE_CLI}" != "" ]; then
BASELINE_CLI_PATH=$(fullpath "${BASELINE_CLI}")
BASELINE_RUN_SCRIPT=$(create_runscript "baseline" "${BASELINE_CLI_PATH}" "$@")
BASELINE_RUN_SCRIPT=$(create_run_script "baseline" "${BASELINE_CLI_PATH}" "${GIT_ARGUMENTS[@]}")
fi
TEST_CLI_PATH=$(fullpath "${TEST_CLI}")
TEST_RUN_SCRIPT=$(create_runscript "test" "${TEST_CLI_PATH}" "$@")
PREPARE_SCRIPT="$(create_preparescript)"
TEST_CLI_PATH=$(fullpath "${TEST_CLI}")
TEST_RUN_SCRIPT=$(create_run_script "test" "${TEST_CLI_PATH}" "${GIT_ARGUMENTS[@]}")
ARGUMENTS=("--prepare" "bash ${PREPARE_SCRIPT}" "--warmup" "${WARMUP}")
if [ "${OUTPUT_STYLE}" != "" ]; then
@@ -379,13 +477,42 @@ gitbench() {
fi
if [ "${BASELINE_CLI}" != "" ]; then
ARGUMENTS+=("-n" "${BASELINE_CLI} $*" "bash ${BASELINE_RUN_SCRIPT}")
ARGUMENTS+=("-n" "${BASELINE_CLI} ${GIT_ARGUMENTS[*]}" "bash ${BASELINE_RUN_SCRIPT}")
fi
ARGUMENTS+=("-n" "${TEST_CLI} $*" "bash ${TEST_RUN_SCRIPT}")
ARGUMENTS+=("-n" "${TEST_CLI} ${GIT_ARGUMENTS[*]}" "bash ${TEST_RUN_SCRIPT}")
hyperfine "${ARGUMENTS[@]}"
rm -rf "${SANDBOX_DIR:?}"
}
#
# this is the function that the outer script calls to actually do the sandboxing and
# invocation of hyperfine.
#
gitbench() {
eval $(parse_arguments "$@")
# sanity check
for a in "${SANDBOX[@]}"; do
if [ ! -d "$(resources_dir)/${a}" ]; then
echo "$0: no resource '${a}' found" 1>&2
exit 1
fi
done
# set up our sandboxing
SANDBOX_DIR="$(temp_dir)"
PREPARE_SCRIPT="$(create_prepare_script)"
if [ "${PROFILE}" != "" ]; then
exec_profiler
else
exec_hyperfine
fi
# rm -rf "${SANDBOX_DIR:?}"
}
# helper script to give useful error messages about configuration
@@ -397,17 +524,45 @@ needs_repo() {
exit 1
fi
REPO_UPPER=$(echo "${1}" | tr '[:lower:]' '[:upper:]')
REPO_URL=$(eval echo "\${BENCHMARK_${REPO_UPPER}_REPOSITORY}")
REPO_UPPER=$(echo "${REPO}" | tr '[:lower:]' '[:upper:]')
REPO_PATH=$(eval echo "\${BENCHMARK_${REPO_UPPER}_PATH}")
REPO_REMOTE_URL=$(eval echo "\${HELP_${REPO_UPPER}_REMOTE}")
if [ "${REPO_URL}" = "" ]; then
if [ "${REPO_PATH}" = "" ]; then
echo "$0: '${REPO}' repository not configured" 1>&2
echo "" 1>&2
echo "This benchmark needs an on-disk '${REPO}' repository. First, clone the" 1>&2
echo "remote repository ('${REPO_REMOTE_URL}') locally then set," 1>&2
echo "the 'BENCHMARK_${REPO_UPPER}_REPOSITORY' environment variable to the path that" 1>&2
echo "remote repository ('${REPO_REMOTE_URL}') locally then set" 1>&2
echo "the 'BENCHMARK_${REPO_UPPER}_PATH' environment variable to the path that" 1>&2
echo "contains the repository locally, then run this benchmark again." 1>&2
exit 2
fi
}
# helper script to give useful error messages about configuration
needs_resource() {
RESOURCE="${1}"
if [ "${RESOURCE}" = "" ]; then
echo "usage: needs_resource <resource>" 1>&2
exit 1
fi
RESOURCE_UPPER=$(echo "${RESOURCE}" | tr '[:lower:]' '[:upper:]' | sed -e "s/-/_/g")
RESOURCE_PATH=$(eval echo "\${BENCHMARK_${RESOURCE_UPPER}_PATH}")
if [ "${RESOURCE_PATH}" = "" -a "${BENCHMARK_RESOURCES_PATH}" != "" ]; then
RESOURCE_PATH="${BENCHMARK_RESOURCES_PATH}/${RESOURCE}"
fi
if [ "${RESOURCE_PATH}" = "" ]; then
echo "$0: '${RESOURCE}' resource path not configured" 1>&2
echo "" 1>&2
echo "This benchmark needs an on-disk resource named '${RESOURCE}'." 1>&2
echo "First, clone the additional benchmark resources locally (from" 1>&2
echo "'${HELP_RESOURCE_REPO}'), then set the" 1>& 2
echo "'BENCHMARK_RESOURCES_PATH' environment variable to the path that" 1>&2
echo "contains the resources locally, then run this benchmark again." 1>&2
exit 2
fi
}

View File

@@ -0,0 +1,11 @@
#!/bin/bash -e
. "$(dirname "$0")/benchmark_helpers.sh"
needs_resource packfile-250mb
gitbench --prepare "git init --bare dest.git && sandbox_resource packfile-250mb && mv packfile-250mb dest.git/packfile-250mb.pack" \
--warmup 5 \
--chdir "dest.git" \
-- \
index-pack packfile-250mb.pack