Merge pull request #7010 from libgit2/ethomson/index-bench

Add profiling data to benchmarks
2026-06-22 06:26:26 +00:00 · 2025-01-13 23:19:48 +00:00
parent 6bc64fb56e 02435d7647
commit 7daaf6151a
43 changed files with 54043 additions and 92 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -9,6 +9,9 @@ on:
      debug:
        type: boolean
        description: Debugging output
+      deploy:
+        type: boolean
+        description: Deploy the benchmark site
  schedule:
  - cron: '15 4 * * *'

@@ -34,16 +37,16 @@ jobs:
          setup-script: ubuntu
          env:
            CC: clang
-            CMAKE_OPTIONS: -DUSE_HTTPS=OpenSSL -DREGEX_BACKEND=builtin -DDEPRECATE_HARD=ON -DUSE_GSSAPI=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=Release
-            CMAKE_BUILD_OPTIONS: --config Release
+            CMAKE_OPTIONS: -DUSE_HTTPS=OpenSSL -DREGEX_BACKEND=builtin -DDEPRECATE_HARD=ON -DUSE_GSSAPI=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
+            CMAKE_BUILD_OPTIONS: --config RelWithDebInfo
        - name: "macOS"
          id: macos
          os: macos-latest
          setup-script: osx
          env:
            CC: clang
-            CMAKE_OPTIONS: -DREGEX_BACKEND=regcomp_l -DDEPRECATE_HARD=ON -DUSE_GSSAPI=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=Release
-            CMAKE_BUILD_OPTIONS: --config Release
+            CMAKE_OPTIONS: -DREGEX_BACKEND=regcomp_l -DDEPRECATE_HARD=ON -DUSE_GSSAPI=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
+            CMAKE_BUILD_OPTIONS: --config RelWithDebInfo
            PKG_CONFIG_PATH: /usr/local/opt/openssl/lib/pkgconfig
        - name: "Windows (amd64, Visual Studio)"
          id: windows
@@ -52,8 +55,8 @@ jobs:
          env:
            ARCH: amd64
            CMAKE_GENERATOR: Visual Studio 17 2022
-            CMAKE_OPTIONS: -A x64 -DDEPRECATE_HARD=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=Release
-            CMAKE_BUILD_OPTIONS: --config Release
+            CMAKE_OPTIONS: -A x64 -DDEPRECATE_HARD=ON -DBUILD_TESTS=OFF -DBUILD_EXAMPLES=OFF -DBUILD_CLI=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo
+            CMAKE_BUILD_OPTIONS: --config RelWithDebInfo
      fail-fast: false
    name: "Benchmark ${{ matrix.platform.name }}"
    env: ${{ matrix.platform.env }}
@@ -70,9 +73,18 @@ jobs:
      if: matrix.platform.setup-script != ''
    - name: Clone resource repositories
      run: |
-        mkdir resources
+        # TODO:
+        # we need a superior way to package the benchmark resources; lfs
+        # is too expensive
+        # git lfs install
+        # git clone https://github.com/libgit2/benchmark-resources resources
+
        git clone --bare https://github.com/git/git resources/git
-        git clone --bare https://github.com/torvalds/linux resources/linux
+
+        # TODO:
+        # avoid linux temporarily; the linux blame benchmarks are simply
+        # too slow to use
+        # git clone --bare https://github.com/torvalds/linux resources/linux
    - name: Build
      run: |
        mkdir build && cd build
@@ -80,13 +92,16 @@ jobs:
      shell: bash
    - name: Benchmark
      run: |
-        export BENCHMARK_GIT_REPOSITORY="$(pwd)/resources/git"
+        # TODO:
+        # avoid benchmark resource path currently
+        #export BENCHMARK_RESOURCES_PATH="$(pwd)/resources"
+        export BENCHMARK_GIT_PATH="$(pwd)/resources/git"
        # avoid linux temporarily; the linux blame benchmarks are simply
        # too slow to use
-        # export BENCHMARK_LINUX_REPOSITORY="$(pwd)/resources/linux"
+        # export BENCHMARK_LINUX_PATH="$(pwd)/resources/linux"

        if [[ "$(uname -s)" == MINGW* ]]; then
-          GIT2_CLI="$(cygpath -w $(pwd))\\build\\Release\\git2"
+          GIT2_CLI="$(cygpath -w $(pwd))\\build\\RelWithDebInfo\\git2"
        else
          GIT2_CLI="$(pwd)/build/git2"
        fi
@@ -103,7 +118,7 @@ jobs:
        ../source/tests/benchmarks/benchmark.sh \
            ${SUITE_FLAG} ${DEBUG_FLAG} \
            --baseline-cli "git" --cli "${GIT2_CLI}" --name libgit2 \
-            --json benchmarks.json --zip benchmarks.zip
+            --json benchmarks.json --flamegraph --zip benchmarks.zip
      shell: bash
    - name: Upload results
      uses: actions/upload-artifact@v4
@@ -116,7 +131,7 @@ jobs:
  publish:
    name: Publish results
    needs: [ build ]
-    if: always() && github.repository == 'libgit2/libgit2' && github.event_name == 'schedule'
+    if: always() && github.repository == 'libgit2/libgit2'
    runs-on: ubuntu-latest
    steps:
    - name: Check out benchmark repository
@@ -128,40 +143,58 @@ jobs:
        ssh-key: ${{ secrets.BENCHMARKS_PUBLISH_KEY }}
    - name: Download test results
      uses: actions/download-artifact@v4
+    - name: Generate API
+      run: |
+        # Move today's benchmark run into the right place
+        for platform in linux macos windows; do
+          TIMESTAMP=$(jq .time.start < "benchmark-${platform}/benchmarks.json")
+          TIMESTAMP_LEN=$(echo -n ${TIMESTAMP} | wc -c | xargs)
+          DENOMINATOR=1
+          if [ "${TIMESTAMP_LEN}" = "19" ]; then
+            DENOMINATOR="1000000000"
+          elif [ "${TIMESTAMP_LEN}" = "13" ]; then
+            DENOMINATOR="1000"
+          else
+            echo "unknown timestamp"
+            exit 1
+          fi
+
+          if [[ "$(uname -s)" == "Darwin" ]]; then
+            DATE=$(date -R -r $(("${TIMESTAMP}/${DENOMINATOR}")) +"%Y-%m-%d")
+          else
+            DATE=$(date -d @$(("${TIMESTAMP}/${DENOMINATOR}")) +"%Y-%m-%d")
+          fi
+
+          # move the complete results in
+          mkdir -p "site/public/api/runs/${DATE}"
+          cp "benchmark-${platform}/benchmarks.json" "site/public/api/runs/${DATE}/${platform}.json"
+
+          # unzip the individual results
+          PLATFORM_TEMP=$(mktemp -d)
+          unzip "benchmark-${platform}/benchmarks.zip" -d "${PLATFORM_TEMP}"
+
+          mkdir -p "site/public/api/runs/${DATE}/${platform}"
+          find "${PLATFORM_TEMP}" -name \*\.svg -exec cp {} "site/public/api/runs/${DATE}/${platform}" \;
+        done
+
+        (cd site && node scripts/aggregate.js)
+      shell: bash
+
+      # in debug mode, don't deploy the site; only create a zip file and
+      # upload it for debugging
+    - name: Upload site
+      uses: actions/upload-artifact@v4
+      with:
+        name: site
+        path: site
+      if: github.event_name == 'workflow_dispatch'
    - name: Publish API
      run: |
-       # Move today's benchmark run into the right place
-       for platform in linux macos windows; do
-         TIMESTAMP=$(jq .time.start < "benchmark-${platform}/benchmarks.json")
-         TIMESTAMP_LEN=$(echo -n ${TIMESTAMP} | wc -c | xargs)
-         DENOMINATOR=1
-         if [ "${TIMESTAMP_LEN}" = "19" ]; then
-           DENOMINATOR="1000000000"
-         elif [ "${TIMESTAMP_LEN}" = "13" ]; then
-           DENOMINATOR="1000"
-         else
-           echo "unknown timestamp"
-           exit 1
-         fi
-
-         if [[ "$(uname -s)" == "Darwin" ]]; then
-           DATE=$(date -R -r $(("${TIMESTAMP}/${DENOMINATOR}")) +"%Y-%m-%d")
-         else
-           DATE=$(date -d @$(("${TIMESTAMP}/${DENOMINATOR}")) +"%Y-%m-%d")
-         fi
-
-         mkdir -p "site/public/api/runs/${DATE}"
-         cp "benchmark-${platform}/benchmarks.json" "site/public/api/runs/${DATE}/${platform}.json"
-       done
-
-       (cd site && node scripts/aggregate.js)
-
-       (
-        cd site &&
        git config user.name 'Benchmark Site Generation' &&
        git config user.email 'libgit2@users.noreply.github.com' &&
        git add . &&
        git commit --allow-empty -m"benchmark update ${DATE}" &&
        git push origin main
-       )
      shell: bash
+      working-directory: site
+      if: github.event_name == 'schedule' || github.event.inputs.deploy == 'true'
--- a/ci/setup-ubuntu-benchmark.sh
+++ b/ci/setup-ubuntu-benchmark.sh
@@ -18,3 +18,6 @@ sudo apt-get install -y --no-install-recommends \

 wget https://github.com/sharkdp/hyperfine/releases/download/v1.12.0/hyperfine_1.12.0_amd64.deb
 sudo dpkg -i hyperfine_1.12.0_amd64.deb
+
+echo -n "Setting performance events availability to: "
+echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
--- a/tests/benchmarks/_script/flamegraph/README.md
+++ b/tests/benchmarks/_script/flamegraph/README.md
@@ -0,0 +1,226 @@
+# Flame Graphs visualize profiled code
+
+Main Website: http://www.brendangregg.com/flamegraphs.html
+
+Example (click to zoom):
+
+[![Example](http://www.brendangregg.com/FlameGraphs/cpu-bash-flamegraph.svg)](http://www.brendangregg.com/FlameGraphs/cpu-bash-flamegraph.svg)
+
+Click a box to zoom the Flame Graph to this stack frame only.
+To search and highlight all stack frames matching a regular expression, click the _search_ button in the upper right corner or press Ctrl-F.
+By default, search is case sensitive, but this can be toggled by pressing Ctrl-I or by clicking the _ic_ button in the upper right corner.
+
+Other sites:
+- The Flame Graph article in ACMQ and CACM: http://queue.acm.org/detail.cfm?id=2927301 http://cacm.acm.org/magazines/2016/6/202665-the-flame-graph/abstract
+- CPU profiling using Linux perf\_events, DTrace, SystemTap, or ktap: http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html
+- CPU profiling using XCode Instruments: http://schani.wordpress.com/2012/11/16/flame-graphs-for-instruments/  
+- CPU profiling using Xperf.exe: http://randomascii.wordpress.com/2013/03/26/summarizing-xperf-cpu-usage-with-flame-graphs/  
+- Memory profiling: http://www.brendangregg.com/FlameGraphs/memoryflamegraphs.html  
+- Other examples, updates, and news: http://www.brendangregg.com/flamegraphs.html#Updates
+
+Flame graphs can be created in three steps:
+
+1. Capture stacks
+2. Fold stacks
+3. flamegraph.pl
+
+1\. Capture stacks
+=================
+Stack samples can be captured using Linux perf\_events, FreeBSD pmcstat (hwpmc), DTrace, SystemTap, and many other profilers. See the stackcollapse-\* converters.
+
+### Linux perf\_events
+
+Using Linux perf\_events (aka "perf") to capture 60 seconds of 99 Hertz stack samples, both user- and kernel-level stacks, all processes:
+
+```
+# perf record -F 99 -a -g -- sleep 60
+# perf script > out.perf
+```
+
+Now only capturing PID 181:
+
+```
+# perf record -F 99 -p 181 -g -- sleep 60
+# perf script > out.perf
+```
+
+### DTrace
+
+Using DTrace to capture 60 seconds of kernel stacks at 997 Hertz:
+
+```
+# dtrace -x stackframes=100 -n 'profile-997 /arg0/ { @[stack()] = count(); } tick-60s { exit(0); }' -o out.kern_stacks
+```
+
+Using DTrace to capture 60 seconds of user-level stacks for PID 12345 at 97 Hertz:
+
+```
+# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345 && arg1/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
+```
+
+60 seconds of user-level stacks, including time spent in-kernel, for PID 12345 at 97 Hertz:
+
+```
+# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
+```
+
+Switch `ustack()` for `jstack()` if the application has a ustack helper to include translated frames (eg, node.js frames; see: http://dtrace.org/blogs/dap/2012/01/05/where-does-your-node-program-spend-its-time/).  The rate for user-level stack collection is deliberately slower than kernel, which is especially important when using `jstack()` as it performs additional work to translate frames.
+
+2\. Fold stacks
+==============
+Use the stackcollapse programs to fold stack samples into single lines.  The programs provided are:
+
+- `stackcollapse.pl`: for DTrace stacks
+- `stackcollapse-perf.pl`: for Linux perf_events "perf script" output
+- `stackcollapse-pmc.pl`: for FreeBSD pmcstat -G stacks
+- `stackcollapse-stap.pl`: for SystemTap stacks
+- `stackcollapse-instruments.pl`: for XCode Instruments
+- `stackcollapse-vtune.pl`: for Intel VTune profiles
+- `stackcollapse-ljp.awk`: for Lightweight Java Profiler
+- `stackcollapse-jstack.pl`: for Java jstack(1) output
+- `stackcollapse-gdb.pl`: for gdb(1) stacks
+- `stackcollapse-go.pl`: for Golang pprof stacks
+- `stackcollapse-vsprof.pl`: for Microsoft Visual Studio profiles
+- `stackcollapse-wcp.pl`: for wallClockProfiler output
+
+Usage example:
+
+```
+For perf_events:
+$ ./stackcollapse-perf.pl out.perf > out.folded
+
+For DTrace:
+$ ./stackcollapse.pl out.kern_stacks > out.kern_folded
+```
+
+The output looks like this:
+
+```
+unix`_sys_sysenter_post_swapgs 1401
+unix`_sys_sysenter_post_swapgs;genunix`close 5
+unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf 85
+unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_closef 26
+unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_setf 5
+unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_getstate 6
+unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_unfalloc 2
+unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`closef 48
+[...]
+```
+
+3\. flamegraph.pl
+================
+Use flamegraph.pl to render a SVG.
+
+```
+$ ./flamegraph.pl out.kern_folded > kernel.svg
+```
+
+An advantage of having the folded input file (and why this is separate to flamegraph.pl) is that you can use grep for functions of interest. Eg:
+
+```
+$ grep cpuid out.kern_folded | ./flamegraph.pl > cpuid.svg
+```
+
+Provided Examples
+=================
+
+### Linux perf\_events
+
+An example output from Linux "perf script" is included, gzip'd, as example-perf-stacks.txt.gz. The resulting flame graph is example-perf.svg:
+
+[![Example](http://www.brendangregg.com/FlameGraphs/example-perf.svg)](http://www.brendangregg.com/FlameGraphs/example-perf.svg)
+
+You can create this using:
+
+```
+$ gunzip -c example-perf-stacks.txt.gz | ./stackcollapse-perf.pl --all | ./flamegraph.pl --color=java --hash > example-perf.svg
+```
+
+This shows my typical workflow: I'll gzip profiles on the target, then copy them to my laptop for analysis. Since I have hundreds of profiles, I leave them gzip'd!
+
+Since this profile included Java, I used the flamegraph.pl --color=java palette. I've also used stackcollapse-perf.pl --all, which includes all annotations that help flamegraph.pl use separate colors for kernel and user level code. The resulting flame graph uses: green == Java, yellow == C++, red == user-mode native, orange == kernel.
+
+This profile was from an analysis of vert.x performance. The benchmark client, wrk, is also visible in the flame graph.
+
+### DTrace
+
+An example output from DTrace is also included, example-dtrace-stacks.txt, and the resulting flame graph, example-dtrace.svg:
+
+[![Example](http://www.brendangregg.com/FlameGraphs/example-dtrace.svg)](http://www.brendangregg.com/FlameGraphs/example-dtrace.svg)
+
+You can generate this using:
+
+```
+$ ./stackcollapse.pl example-stacks.txt | ./flamegraph.pl > example.svg
+```
+
+This was from a particular performance investigation: the Flame Graph identified that CPU time was spent in the lofs module, and quantified that time.
+
+
+Options
+=======
+See the USAGE message (--help) for options:
+
+USAGE: ./flamegraph.pl [options] infile > outfile.svg
+
+	--title TEXT     # change title text
+	--subtitle TEXT  # second level title (optional)
+	--width NUM      # width of image (default 1200)
+	--height NUM     # height of each frame (default 16)
+	--minwidth NUM   # omit smaller functions. In pixels or use "%" for 
+	                 # percentage of time (default 0.1 pixels)
+	--fonttype FONT  # font type (default "Verdana")
+	--fontsize NUM   # font size (default 12)
+	--countname TEXT # count type label (default "samples")
+	--nametype TEXT  # name type label (default "Function:")
+	--colors PALETTE # set color palette. choices are: hot (default), mem,
+	                 # io, wakeup, chain, java, js, perl, red, green, blue,
+	                 # aqua, yellow, purple, orange
+	--bgcolors COLOR # set background colors. gradient choices are yellow
+	                 # (default), blue, green, grey; flat colors use "#rrggbb"
+	--hash           # colors are keyed by function name hash
+	--cp             # use consistent palette (palette.map)
+	--reverse        # generate stack-reversed flame graph
+	--inverted       # icicle graph
+	--flamechart     # produce a flame chart (sort by time, do not merge stacks)
+	--negate         # switch differential hues (blue<->red)
+	--notes TEXT     # add notes comment in SVG (for debugging)
+	--help           # this message
+
+	eg,
+	./flamegraph.pl --title="Flame Graph: malloc()" trace.txt > graph.svg
+
+As suggested in the example, flame graphs can process traces of any event,
+such as malloc()s, provided stack traces are gathered.
+
+
+Consistent Palette
+==================
+If you use the `--cp` option, it will use the $colors selection and randomly
+generate the palette like normal. Any future flamegraphs created using the `--cp`
+option will use the same palette map. Any new symbols from future flamegraphs
+will have their colors randomly generated using the $colors selection.
+
+If you don't like the palette, just delete the palette.map file.
+
+This allows your to change your colorscheme between flamegraphs to make the
+differences REALLY stand out.
+
+Example:
+
+Say we have 2 captures, one with a problem, and one when it was working
+(whatever "it" is):
+
+```
+cat working.folded | ./flamegraph.pl --cp > working.svg
+# this generates a palette.map, as per the normal random generated look.
+
+cat broken.folded | ./flamegraph.pl --cp --colors mem > broken.svg
+# this svg will use the same palette.map for the same events, but a very
+# different colorscheme for any new events.
+```
+
+Take a look at the demo directory for an example:
+
+palette-example-working.svg  
+palette-example-broken.svg
--- a/tests/benchmarks/_script/flamegraph/aix-perf.pl
+++ b/tests/benchmarks/_script/flamegraph/aix-perf.pl
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+
+use Getopt::Std;
+
+getopt('urt');
+
+unless ($opt_r && $opt_t){
+	print "Usage: $0 [ -u user] -r sample_count -t sleep_time\n";
+	exit(0);
+}
+
+my $i;
+my @proc = "";
+for ($i = 0; $i < $opt_r ; $i++){
+    if ($opt_u){
+	$proc = `/usr/sysv/bin/ps -u $opt_u `;
+	$proc =~ s/^.*\n//;
+	$proc =~ s/\s*(\d+).*\n/\1 /g;
+	@proc = split(/\s+/,$proc);
+    } else {
+	opendir(my $dh, '/proc') || die "Cant't open /proc: $!";
+	@proc = grep { /^[\d]+$/ } readdir($dh);
+	closedir ($dh);
+    }	
+
+    foreach my $pid (@proc){
+	my $command = "/usr/bin/procstack $pid";
+	print `$command 2>/dev/null`;
+    }
+    select(undef, undef, undef, $opt_t);
+}
--- a/tests/benchmarks/_script/flamegraph/difffolded.pl
+++ b/tests/benchmarks/_script/flamegraph/difffolded.pl
@@ -0,0 +1,115 @@
+#!/usr/bin/perl -w
+#
+# difffolded.pl 	diff two folded stack files. Use this for generating
+#			flame graph differentials.
+#
+# USAGE: ./difffolded.pl [-hns] folded1 folded2 | ./flamegraph.pl > diff2.svg
+#
+# Options are described in the usage message (-h).
+#
+# The flamegraph will be colored based on higher samples (red) and smaller
+# samples (blue). The frame widths will be based on the 2nd folded file.
+# This might be confusing if stack frames disappear entirely; it will make
+# the most sense to ALSO create a differential based on the 1st file widths,
+# while switching the hues; eg:
+#
+#  ./difffolded.pl folded2 folded1 | ./flamegraph.pl --negate > diff1.svg
+#
+# Here's what they mean when comparing a before and after profile:
+#
+# diff1.svg: widths show the before profile, colored by what WILL happen
+# diff2.svg: widths show the after profile, colored by what DID happen
+#
+# INPUT: See stackcollapse* programs.
+#
+# OUTPUT: The full list of stacks, with two columns, one from each file.
+# If a stack wasn't present in a file, the column value is zero.
+#
+# folded_stack_trace count_from_folded1 count_from_folded2
+#
+# eg:
+#
+# funca;funcb;funcc 31 33
+# ...
+#
+# COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License
+#  as published by the Free Software Foundation; either version 2
+#  of the License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software Foundation,
+#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+#  (http://www.gnu.org/copyleft/gpl.html)
+#
+# 28-Oct-2014	Brendan Gregg	Created this.
+
+use strict;
+use Getopt::Std;
+
+# defaults
+my $normalize = 0;	# make sample counts equal
+my $striphex = 0;	# strip hex numbers
+
+sub usage {
+	print STDERR <<USAGE_END;
+USAGE: $0 [-hns] folded1 folded2 | flamegraph.pl > diff2.svg
+	    -h       # help message
+	    -n       # normalize sample counts
+	    -s       # strip hex numbers (addresses)
+See stackcollapse scripts for generating folded files.
+Also consider flipping the files and hues to highlight reduced paths:
+$0 folded2 folded1 | ./flamegraph.pl --negate > diff1.svg
+USAGE_END
+	exit 2;
+}
+
+usage() if @ARGV < 2;
+our($opt_h, $opt_n, $opt_s);
+getopts('ns') or usage();
+usage() if $opt_h;
+$normalize = 1 if defined $opt_n;
+$striphex = 1 if defined $opt_s;
+
+my ($total1, $total2) = (0, 0);
+my %Folded;
+
+my $file1 = $ARGV[0];
+my $file2 = $ARGV[1];
+
+open FILE, $file1 or die "ERROR: Can't read $file1\n";
+while (<FILE>) {
+	chomp;
+	my ($stack, $count) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
+	$stack =~ s/0x[0-9a-fA-F]+/0x.../g if $striphex;
+	$Folded{$stack}{1} += $count;
+	$total1 += $count;
+}
+close FILE;
+
+open FILE, $file2 or die "ERROR: Can't read $file2\n";
+while (<FILE>) {
+	chomp;
+	my ($stack, $count) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
+	$stack =~ s/0x[0-9a-fA-F]+/0x.../g if $striphex;
+	$Folded{$stack}{2} += $count;
+	$total2 += $count;
+}
+close FILE;
+
+foreach my $stack (keys %Folded) {
+	$Folded{$stack}{1} = 0 unless defined $Folded{$stack}{1};
+	$Folded{$stack}{2} = 0 unless defined $Folded{$stack}{2};
+	if ($normalize && $total1 != $total2) {
+		$Folded{$stack}{1} = int($Folded{$stack}{1} * $total2 / $total1);
+	}
+	print "$stack $Folded{$stack}{1} $Folded{$stack}{2}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/example-dtrace-stacks.txt
+++ b/tests/benchmarks/_script/flamegraph/example-dtrace-stacks.txt
--- a/tests/benchmarks/_script/flamegraph/example-dtrace.svg
+++ b/tests/benchmarks/_script/flamegraph/example-dtrace.svg
--- a/tests/benchmarks/_script/flamegraph/example-perf-stacks.txt.gz
+++ b/tests/benchmarks/_script/flamegraph/example-perf-stacks.txt.gz
--- a/tests/benchmarks/_script/flamegraph/example-perf.svg
+++ b/tests/benchmarks/_script/flamegraph/example-perf.svg
--- a/tests/benchmarks/_script/flamegraph/files.pl
+++ b/tests/benchmarks/_script/flamegraph/files.pl
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+#
+# files.pl	Print file sizes in folded format, for a flame graph.
+#
+# This helps you understand storage consumed by a file system, by creating
+# a flame graph visualization of space consumed. This is basically a Perl
+# version of the "find" command, which emits in folded format for piping
+# into flamegraph.pl.
+#
+# Copyright (c) 2017 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 03-Feb-2017   Brendan Gregg   Created this.
+
+use strict;
+use File::Find;
+
+sub usage {
+	print STDERR "USAGE: $0 [--xdev] [DIRECTORY]...\n";
+	print STDERR "   eg, $0 /Users\n";
+	print STDERR "   To not descend directories on other filesystems:";
+	print STDERR "   eg, $0 --xdev /\n";
+	print STDERR "Intended to be piped to flamegraph.pl. Full example:\n";
+	print STDERR "   $0 /Users | flamegraph.pl " .
+	    "--hash --countname=bytes > files.svg\n";
+	print STDERR "   $0 /usr /home /root /etc | flamegraph.pl " .
+	    "--hash --countname=bytes > files.svg\n";
+	print STDERR "   $0 --xdev / | flamegraph.pl " .
+	    "--hash --countname=bytes > files.svg\n";
+	exit 1;
+}
+
+usage() if @ARGV == 0 or $ARGV[0] eq "--help" or $ARGV[0] eq "-h";
+
+my $filter_xdev = 0;
+my $xdev_id;
+
+foreach my $dir (@ARGV) {
+	if ($dir eq "--xdev") {
+	    $filter_xdev = 1;
+	} else {
+	    find(\&wanted, $dir);
+	}
+}
+
+sub wanted {
+	my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size) = lstat($_);
+	return unless defined $size;
+	if ($filter_xdev) {
+		if (!$xdev_id) {
+			$xdev_id = $dev;
+		} elsif ($xdev_id ne $dev) {
+			$File::Find::prune = 1;
+			return;
+		}
+	}
+	my $path = $File::Find::name;
+	$path =~ tr/\//;/;		# delimiter
+	$path =~ tr/;.a-zA-Z0-9-/_/c;	# ditch whitespace and other chars
+	$path =~ s/^;//;
+	print "$path $size\n";
+}
--- a/tests/benchmarks/_script/flamegraph/flamegraph.pl
+++ b/tests/benchmarks/_script/flamegraph/flamegraph.pl
--- a/tests/benchmarks/_script/flamegraph/jmaps
+++ b/tests/benchmarks/_script/flamegraph/jmaps
@@ -0,0 +1,104 @@
+#!/bin/bash
+#
+# jmaps - creates java /tmp/perf-PID.map symbol maps for all java processes.
+#
+# This is a helper script that finds all running "java" processes, then executes
+# perf-map-agent on them all, creating symbol map files in /tmp. These map files
+# are read by perf_events (aka "perf") when doing system profiles (specifically,
+# the "report" and "script" subcommands).
+#
+# USAGE: jmaps [-u]
+#		-u	# unfoldall: include inlined symbols
+#
+# My typical workflow is this:
+#
+# perf record -F 99 -a -g -- sleep 30; jmaps
+# perf script > out.stacks
+# ./stackcollapse-perf.pl out.stacks | ./flamegraph.pl --color=java --hash > out.stacks.svg
+#
+# The stackcollapse-perf.pl and flamegraph.pl programs come from:
+# https://github.com/brendangregg/FlameGraph
+#
+# REQUIREMENTS:
+# Tune two environment settings below.
+#
+# 13-Feb-2015	Brendan Gregg	Created this.
+# 20-Feb-2017      "      "     Added -u for unfoldall.
+
+JAVA_HOME=${JAVA_HOME:-/usr/lib/jvm/java-8-oracle}
+AGENT_HOME=${AGENT_HOME:-/usr/lib/jvm/perf-map-agent}  # from https://github.com/jvm-profiling-tools/perf-map-agent
+debug=0
+
+if [[ "$USER" != root ]]; then
+	echo "ERROR: not root user? exiting..."
+	exit
+fi
+
+if [[ ! -x $JAVA_HOME ]]; then
+	echo "ERROR: JAVA_HOME not set correctly; edit $0 and fix"
+	exit
+fi
+
+if [[ ! -x $AGENT_HOME ]]; then
+	echo "ERROR: AGENT_HOME not set correctly; edit $0 and fix"
+	exit
+fi
+
+if [[ "$1" == "-u" ]]; then
+	opts=unfoldall
+fi
+
+# figure out where the agent files are:
+AGENT_OUT=""
+AGENT_JAR=""
+if [[ -e $AGENT_HOME/out/attach-main.jar ]]; then
+	AGENT_JAR=$AGENT_HOME/out/attach-main.jar
+elif [[ -e $AGENT_HOME/attach-main.jar ]]; then
+	AGENT_JAR=$AGENT_HOME/attach-main.jar
+fi
+if [[ -e $AGENT_HOME/out/libperfmap.so ]]; then
+	AGENT_OUT=$AGENT_HOME/out
+elif [[ -e $AGENT_HOME/libperfmap.so ]]; then
+	AGENT_OUT=$AGENT_HOME
+fi
+if [[ "$AGENT_OUT" == "" || "$AGENT_JAR" == "" ]]; then
+	echo "ERROR: Missing perf-map-agent files in $AGENT_HOME. Check installation."
+	exit
+fi
+
+# Fetch map for all "java" processes
+echo "Fetching maps for all java processes..."
+for pid in $(pgrep -x java); do
+	mapfile=/tmp/perf-$pid.map
+	[[ -e $mapfile ]] && rm $mapfile
+
+	cmd="cd $AGENT_OUT; $JAVA_HOME/bin/java -Xms32m -Xmx128m -cp $AGENT_JAR:$JAVA_HOME/lib/tools.jar net.virtualvoid.perf.AttachOnce $pid $opts"
+	(( debug )) && echo $cmd
+
+	user=$(ps ho user -p $pid)
+	group=$(ps ho group -p $pid)
+	if [[ "$user" != root ]]; then
+		if [[ "$user" == [0-9]* ]]; then
+			# UID only, likely GID too, run sudo with #UID:
+			cmd="sudo -u '#'$user -g '#'$group sh -c '$cmd'"
+		else
+			cmd="sudo -u $user -g $group sh -c '$cmd'"
+		fi
+	fi
+
+	echo "Mapping PID $pid (user $user):"
+	if (( debug )); then
+		time eval $cmd
+	else
+		eval $cmd
+	fi
+	if [[ -e "$mapfile" ]]; then
+		chown root $mapfile
+		chmod 666 $mapfile
+	else
+		echo "ERROR: $mapfile not created."
+	fi
+
+	echo "wc(1): $(wc $mapfile)"
+	echo
+done
--- a/tests/benchmarks/_script/flamegraph/pkgsplit-perf.pl
+++ b/tests/benchmarks/_script/flamegraph/pkgsplit-perf.pl
@@ -0,0 +1,86 @@
+#!/usr/bin/perl -w
+#
+# pkgsplit-perf.pl	Split IP samples on package names "/", eg, Java.
+#
+# This is for the creation of Java package flame graphs. Example steps:
+#
+# perf record -F 199 -a -- sleep 30; ./jmaps
+# perf script | ./pkgsplit-perf.pl | ./flamegraph.pl > out.svg
+#
+# Note that stack traces are not sampled (no -g), as we split Java package
+# names into frames rather than stack frames.
+#
+# (jmaps is a helper script for automating perf-map-agent: Java symbol dumps.)
+#
+# The default output of "perf script" varies between kernel versions, so we'll
+# need to deal with that here. I could make people use the perf script option
+# to pick fields, so our input is static, but A) I prefer the simplicity of
+# just saying: run "perf script", and B) the option to choose fields itself
+# changed between kernel versions! -f became -F.
+#
+# Copyright 2017 Netflix, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 20-Sep-2016	Brendan Gregg	Created this.
+
+use strict;
+
+my $include_pname = 1;	# include process names in stacks
+my $include_pid = 0;	# include process ID with process name
+my $include_tid = 0;	# include process & thread ID with process name
+
+while (<>) {
+	# filter comments
+	next if /^#/;
+
+	# filter idle events
+	next if /xen_hypercall_sched_op|cpu_idle|native_safe_halt/;
+
+	my ($pid, $tid, $pname);
+
+	# Linux 3.13:
+	#     java 13905 [000]  8048.096572: cpu-clock:      7fd781ac3053 Ljava/util/Arrays$ArrayList;::toArray (/tmp/perf-12149.map)
+	#     java  8301 [050] 13527.392454: cycles:      7fa8a80d9bff Dictionary::find(int, unsigned int, Symbol*, ClassLoaderData*, Handle, Thread*) (/usr/lib/jvm/java-8-oracle-1.8.0.121/jre/lib/amd64/server/libjvm.so)
+	#     java  4567/8603  [023] 13527.389886: cycles:      7fa863349895 Lcom/google/gson/JsonObject;::add (/tmp/perf-4567.map)
+	#
+	# Linux 4.8:
+	#     java 30894 [007] 452884.077440:   10101010 cpu-clock:      7f0acc8eff67 Lsun/nio/ch/SocketChannelImpl;::read+0x27 (/tmp/perf-30849.map)
+	#     bash 26858/26858 [006] 5440237.995639: cpu-clock:            433573 [unknown] (/bin/bash)
+	#
+	if (/^\s+(\S.+?)\s+(\d+)\/*(\d+)*\s.*?:.*:/) {
+		# parse process name and pid/tid
+		if ($3) {
+			($pid, $tid) = ($2, $3);
+		} else {
+			($pid, $tid) = ("?", $2);
+		}
+
+		if ($include_tid) {
+			$pname = "$1-$pid/$tid";
+		} elsif ($include_pid) {
+			$pname = "$1-$pid";
+		} else {
+			$pname = $1;
+		}
+		$pname =~ tr/ /_/;
+	} else {
+		# not a match
+		next;
+	}
+
+	# parse rest of line
+	s/^.*?:.*?:\s+//;
+	s/ \(.*?\)$//;
+	chomp;
+	my ($addr, $func) = split(' ', $_, 2);
+
+	# strip Java's leading "L"
+	$func =~ s/^L//;
+
+	# replace numbers with X
+	$func =~ s/[0-9]/X/g;
+
+	# colon delimitered
+	$func =~ s:/:;:g;
+	print "$pname;$func 1\n";
+}
--- a/tests/benchmarks/_script/flamegraph/range-perf.pl
+++ b/tests/benchmarks/_script/flamegraph/range-perf.pl
@@ -0,0 +1,137 @@
+#!/usr/bin/perl -w
+#
+# range-perf	Extract a time range from Linux "perf script" output.
+#
+# USAGE EXAMPLE:
+#
+# perf record -F 100 -a -- sleep 60
+# perf script | ./perf2range.pl 10 20	# range 10 to 20 seconds only
+# perf script | ./perf2range.pl 0 0.5	# first half second only
+#
+# MAKING A SERIES OF FLAME GRAPHS:
+#
+# Let's say you had the output of "perf script" in a file, out.stacks01, which
+# was for a 180 second profile. The following command creates a series of
+# flame graphs for each 10 second interval:
+# 
+# for i in `seq 0 10 170`; do cat out.stacks01 | \
+#    ./perf2range.pl $i $((i + 10)) | ./stackcollapse-perf.pl | \
+#    grep -v cpu_idle | ./flamegraph.pl --hash --color=java \
+#    --title="range $i $((i + 10))" > out.range_$i.svg; echo $i done; done
+#
+# In that example, I used "--color=java" for the Java palette, and excluded
+# the idle CPU task. Customize as needed.
+#
+# Copyright 2017 Netflix, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 21-Feb-2017	Brendan Gregg	Created this.
+
+use strict;
+use Getopt::Long;
+use POSIX 'floor';
+
+sub usage {
+	die <<USAGE_END;
+USAGE: $0 [options] min_seconds max_seconds
+	--timeraw	# use raw timestamps from perf
+	--timezerosecs	# time starts at 0 secs, but keep offset from perf
+	eg,
+	$0 10 20	# only include samples between 10 and 20 seconds
+USAGE_END
+}
+
+my $timeraw = 0;
+my $timezerosecs = 0;
+GetOptions(
+	'timeraw'       => \$timeraw,
+	'timezerosecs'  => \$timezerosecs,
+) or usage();
+
+if (@ARGV < 2 || $ARGV[0] eq "-h" || $ARGV[0] eq "--help") {
+	usage();
+	exit;
+}
+my $begin = $ARGV[0];
+my $end = $ARGV[1];
+
+#
+# Parsing
+#
+# IP only examples:
+# 
+# java 52025 [026] 99161.926202: cycles: 
+# java 14341 [016] 252732.474759: cycles:      7f36571947c0 nmethod::is_nmethod() const (/...
+# java 14514 [022] 28191.353083: cpu-clock:      7f92b4fdb7d4 Ljava_util_List$size$0;::call (/tmp/perf-11936.map)
+#      swapper     0 [002] 6035557.056977:   10101010 cpu-clock:  ffffffff810013aa xen_hypercall_sched_op+0xa (/lib/modules/4.9-virtual/build/vmlinux)
+#         bash 25370 603are 6036.991603:   10101010 cpu-clock:            4b931e [unknown] (/bin/bash)
+#         bash 25370/25370 6036036.799684: cpu-clock:            4b913b [unknown] (/bin/bash)
+# other combinations are possible.
+#
+# Stack examples (-g):
+#
+# swapper     0 [021] 28648.467059: cpu-clock: 
+#	ffffffff810013aa xen_hypercall_sched_op ([kernel.kallsyms])
+#	ffffffff8101cb2f default_idle ([kernel.kallsyms])
+#	ffffffff8101d406 arch_cpu_idle ([kernel.kallsyms])
+#	ffffffff810bf475 cpu_startup_entry ([kernel.kallsyms])
+#	ffffffff81010228 cpu_bringup_and_idle ([kernel.kallsyms])
+#
+# java 14375 [022] 28648.467079: cpu-clock: 
+#	    7f92bdd98965 Ljava/io/OutputStream;::write (/tmp/perf-11936.map)
+#	    7f8808cae7a8 [unknown] ([unknown])
+#
+# swapper     0 [005]  5076.836336: cpu-clock: 
+#	ffffffff81051586 native_safe_halt ([kernel.kallsyms])
+#	ffffffff8101db4f default_idle ([kernel.kallsyms])
+#	ffffffff8101e466 arch_cpu_idle ([kernel.kallsyms])
+#	ffffffff810c2b31 cpu_startup_entry ([kernel.kallsyms])
+#	ffffffff810427cd start_secondary ([kernel.kallsyms])
+#
+# swapper     0 [002] 6034779.719110:   10101010 cpu-clock: 
+#       2013aa xen_hypercall_sched_op+0xfe20000a (/lib/modules/4.9-virtual/build/vmlinux)
+#       a72f0e default_idle+0xfe20001e (/lib/modules/4.9-virtual/build/vmlinux)
+#       2392bf arch_cpu_idle+0xfe20000f (/lib/modules/4.9-virtual/build/vmlinux)
+#       a73333 default_idle_call+0xfe200023 (/lib/modules/4.9-virtual/build/vmlinux)
+#       2c91a4 cpu_startup_entry+0xfe2001c4 (/lib/modules/4.9-virtual/build/vmlinux)
+#       22b64a cpu_bringup_and_idle+0xfe20002a (/lib/modules/4.9-virtual/build/vmlinux)
+#
+# bash 25370/25370 6035935.188539: cpu-clock: 
+#                   b9218 [unknown] (/bin/bash)
+#                 2037fe8 [unknown] ([unknown])
+# other combinations are possible.
+#
+# This regexp matches the event line, and puts time in $1, and the event name
+# in $2:
+#
+my $event_regexp = qr/ +([0-9\.]+): *\S* *(\S+):/;
+
+my $line;
+my $start = 0;
+my $ok = 0;
+my $time;
+
+while (1) {
+	$line = <STDIN>;
+	last unless defined $line;
+	next if $line =~ /^#/;		# skip comments
+
+	if ($line =~ $event_regexp) {
+		my ($ts, $event) = ($1, $2, $3);
+		$start = $ts if $start == 0;
+
+		if ($timezerosecs) {
+			$time = $ts - floor($start);
+		} elsif (!$timeraw) {
+			$time = $ts - $start;
+		} else {
+			$time = $ts;	# raw times
+		}
+
+		$ok = 1 if $time >= $begin;
+		# assume samples are in time order:
+		exit if $time > $end;
+	}
+
+	print $line if $ok;
+}
--- a/tests/benchmarks/_script/flamegraph/record-test.sh
+++ b/tests/benchmarks/_script/flamegraph/record-test.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+#
+# record-test.sh - Overwrite flame graph test result files.
+#
+# See test.sh, which checks these resulting files.
+#
+# Currently only tests stackcollapse-perf.pl.
+
+set -v -x
+
+# ToDo: add some form of --inline, and --inline --context tests. These are
+# tricky since they use addr2line, whose output will vary based on the test
+# system's binaries and symbol tables.
+for opt in pid tid kernel jit all addrs; do
+  for testfile in test/*.txt ; do
+    echo testing $testfile : $opt
+    outfile=${testfile#*/}
+    outfile=test/results/${outfile%.txt}"-collapsed-${opt}.txt"
+    ./stackcollapse-perf.pl --"${opt}" "${testfile}" 2> /dev/null > $outfile
+  done
+done
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-aix.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-aix.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl -ws
+#
+# stackcollapse-aix  Collapse AIX /usr/bin/procstack backtraces
+#
+# Parse a list of backtraces as generated with the poor man's aix-perf.pl
+# profiler 
+#
+
+use strict;
+
+my $process = "";
+my $current = "";
+my $previous_function = "";
+
+my %stacks;
+
+while(<>) {
+  chomp;
+  if (m/^\d+:/) {
+    if(!($current eq "")) {
+      $current = $process . ";" . $current;
+      $stacks{$current} += 1;
+      $current = "";
+    }
+    m/^\d+: ([^ ]*)/;
+    $process = $1;
+    $current = "";
+  }
+  elsif(m/^---------- tid# \d+/){
+    if(!($current eq "")) {
+      $current = $process . ";" . $current;
+      $stacks{$current} += 1;
+    }
+    $current = "";
+  }
+  elsif(m/^(0x[0-9abcdef]*) *([^ ]*) ([^ ]*) ([^ ]*)/) {
+    my $function = $2;
+    my $alt = $1;
+    $function=~s/\(.*\)?//;
+    if($function =~ /^\[.*\]$/) {
+      $function = $alt;
+    }
+    if ($current) {
+      $current = $function . ";" . $current;
+    }
+    else {
+      $current = $function;
+    }
+  }
+}
+
+if(!($current eq "")) {
+  $current = $process . ";" . $current;
+  $stacks{$current} += 1;
+  $current = "";
+  $process = "";
+}
+
+foreach my $k (sort { $a cmp $b } keys %stacks) {
+  print "$k $stacks{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-bpftrace.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-bpftrace.pl
@@ -0,0 +1,72 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-bpftrace.pl	collapse bpftrace samples into single lines.
+#
+# USAGE ./stackcollapse-bpftrace.pl infile > outfile
+#
+# Example input:
+#
+# @[
+# _raw_spin_lock_bh+0
+# tcp_recvmsg+808
+# inet_recvmsg+81
+# sock_recvmsg+67
+# sock_read_iter+144
+# new_sync_read+228
+# __vfs_read+41
+# vfs_read+142
+# sys_read+85
+# do_syscall_64+115
+# entry_SYSCALL_64_after_hwframe+61
+# ]: 3
+#
+# Example output:
+#
+# entry_SYSCALL_64_after_hwframe+61;do_syscall_64+115;sys_read+85;vfs_read+142;__vfs_read+41;new_sync_read+228;sock_read_iter+144;sock_recvmsg+67;inet_recvmsg+81;tcp_recvmsg+808;_raw_spin_lock_bh+0 3
+#
+# Copyright 2018 Peter Sanford.  All rights reserved.
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License
+#  as published by the Free Software Foundation; either version 2
+#  of the License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software Foundation,
+#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+#  (http://www.gnu.org/copyleft/gpl.html)
+#
+
+use strict;
+
+my @stack;
+my $in_stack = 0;
+
+foreach (<>) {
+  chomp;
+  if (!$in_stack) {
+    if (/^@\[$/) {
+      $in_stack = 1;
+    } elsif (/^@\[,\s(.*)\]: (\d+)/) {
+      print $1 . " $2\n";
+    }
+  } else {
+    if (m/^,?\s?(.*)\]: (\d+)/) {
+      if (length $1) {
+        push(@stack, $1);
+      }
+      print join(';', reverse(@stack)) . " $2\n";
+      $in_stack = 0;
+      @stack = ();
+    } else {
+      $_ =~ s/^\s+//;
+      push(@stack, $_);
+    }
+  }
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-chrome-tracing.py
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-chrome-tracing.py
@@ -0,0 +1,144 @@
+#!/usr/bin/python
+#
+# stackcolllapse-chrome-tracing.py	collapse Trace Event Format [1]
+#             callstack events into single lines.
+#
+# [1] https://github.com/catapult-project/catapult/wiki/Trace-Event-Format
+#
+# USAGE: ./stackcollapse-chrome-tracing.py input_json [input_json...] > outfile
+#
+# Example input:
+#
+# {"traceEvents":[
+#     {"pid":1,"tid":2,"ts":0,"ph":"X","name":"Foo","dur":50},
+#     {"pid":1,"tid":2,"ts":10,"ph":"X","name":"Bar","dur":30}
+# ]}
+#
+# Example output:
+#
+#  Foo 20.0
+#  Foo;Bar 30.0
+#
+# Input may contain many stack trace events from many processes/threads.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# 4-Jan-2018	Marcin Kolny	Created this.
+import argparse
+import json
+
+stack_identifiers = {}
+
+
+class Event:
+    def __init__(self, label, timestamp, dur):
+        self.label = label
+        self.timestamp = timestamp
+        self.duration = dur
+        self.total_duration = dur
+
+    def get_stop_timestamp(self):
+        return self.timestamp + self.duration
+
+
+def cantor_pairing(a, b):
+    s = a + b
+    return s * (s + 1) / 2 + b
+
+
+def get_trace_events(trace_file, events_dict):
+    json_data = json.load(trace_file)
+
+    for entry in json_data['traceEvents']:
+        if entry['ph'] == 'X':
+            cantor_val = cantor_pairing(int(entry['tid']), int(entry['pid']))
+            if 'dur' not in entry:
+                continue
+            if cantor_val not in events_dict:
+                events_dict[cantor_val] = []
+            events_dict[cantor_val].append(Event(entry['name'], float(entry['ts']), float(entry['dur'])))
+
+
+def load_events(trace_files):
+    events = {}
+
+    for trace_file in trace_files:
+        get_trace_events(trace_file, events)
+
+    for key in events:
+        events[key].sort(key=lambda x: x.timestamp)
+
+    return events
+
+
+def save_stack(stack):
+    first = True
+    event = None
+    identifier = ''
+
+    for event in stack:
+        if first:
+            first = False
+        else:
+            identifier += ';'
+        identifier += event.label
+
+    if not event:
+        return
+
+    if identifier in stack_identifiers:
+        stack_identifiers[identifier] += event.total_duration
+    else:
+        stack_identifiers[identifier] = event.total_duration
+
+
+def load_stack_identifiers(events):
+    event_stack = []
+
+    for e in events:
+        if not event_stack:
+            event_stack.append(e)
+        else:
+            while event_stack and event_stack[-1].get_stop_timestamp() <= e.timestamp:
+                save_stack(event_stack)
+                event_stack.pop()
+
+            if event_stack:
+                event_stack[-1].total_duration -= e.duration
+
+            event_stack.append(e)
+
+    while event_stack:
+        save_stack(event_stack)
+        event_stack.pop()
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('input_file', nargs='+',
+                    type=argparse.FileType('r'),
+                    help='Chrome Tracing input files')
+args = parser.parse_args()
+
+all_events = load_events(args.input_file)
+for tid_pid in all_events:
+    load_stack_identifiers(all_events[tid_pid])
+
+for identifiers, duration in stack_identifiers.items():
+    print(identifiers + ' ' + str(duration))
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-elfutils.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-elfutils.pl
@@ -0,0 +1,98 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-elfutils  Collapse elfutils stack (eu-stack) backtraces
+#
+# Parse a list of elfutils backtraces as generated with the poor man's
+# profiler [1]:
+#
+#   for x in $(seq 1 "$nsamples"); do
+#      eu-stack -p "$pid" "$@"
+#      sleep "$sleeptime"
+#   done
+#
+# [1] http://poormansprofiler.org/
+#
+# Copyright 2014 Gabriel Corona. All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+use strict;
+use Getopt::Long;
+
+my $with_pid = 0;
+my $with_tid = 0;
+
+GetOptions('pid' => \$with_pid,
+           'tid' => \$with_tid)
+or die <<USAGE_END;
+USAGE: $0 [options] infile > outfile\n
+        --pid           # include PID
+        --tid           # include TID
+USAGE_END
+
+my $pid = "";
+my $tid = "";
+my $current = "";
+my $previous_function = "";
+
+my %stacks;
+
+sub add_current {
+  if(!($current eq "")) {
+    my $entry;
+    if ($with_tid) {
+      $current = "TID=$tid;$current";
+    }
+    if ($with_pid) {
+      $current = "PID=$pid;$current";
+    }
+    $stacks{$current} += 1;
+    $current = "";
+  }
+}
+
+while(<>) {
+  chomp;
+  if (m/^PID ([0-9]*)/) {
+    add_current();
+    $pid = $1;
+  }
+  elsif(m/^TID ([0-9]*)/) {
+    add_current();
+    $tid = $1;
+  } elsif(m/^#[0-9]* *0x[0-9a-f]* (.*)/) {
+    if ($current eq "") {
+      $current = $1;
+    } else {
+      $current = "$1;$current";
+    }
+  } elsif(m/^#[0-9]* *0x[0-9a-f]*/) {
+    if ($current eq "") {
+      $current = "[unknown]";
+    } else {
+      $current = "[unknown];$current";
+    }
+  }
+}
+add_current();
+
+foreach my $k (sort { $a cmp $b } keys %stacks) {
+  print "$k $stacks{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-faulthandler.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-faulthandler.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl -ws
+#
+# stackcollapse-faulthandler  Collapse Python faulthandler backtraces
+#
+# Parse a list of Python faulthandler backtraces as generated with
+# faulthandler.dump_traceback_later.
+#
+# Copyright 2014 Gabriel Corona. All rights reserved.
+# Copyright 2017 Jonathan Kolb. All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+use strict;
+
+my $current = "";
+
+my %stacks;
+
+while(<>) {
+  chomp;
+  if (m/^Thread/) {
+    $current=""
+  }
+  elsif(m/^  File "([^"]*)", line ([0-9]*) in (.*)/) {
+    my $function = $1 . ":" . $2 . ":" . $3;
+    if ($current eq "") {
+      $current = $function;
+    } else {
+      $current = $function . ";" . $current;
+    }
+  } elsif(!($current eq "")) {
+    $stacks{$current} += 1;
+    $current = "";
+  }
+}
+
+if(!($current eq "")) {
+  $stacks{$current} += 1;
+  $current = "";
+}
+
+foreach my $k (sort { $a cmp $b } keys %stacks) {
+  print "$k $stacks{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-gdb.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-gdb.pl
@@ -0,0 +1,72 @@
+#!/usr/bin/perl -ws
+#
+# stackcollapse-gdb  Collapse GDB backtraces
+#
+# Parse a list of GDB backtraces as generated with the poor man's
+# profiler [1]:
+#
+#   for x in $(seq 1 500); do
+#      gdb -ex "set pagination 0" -ex "thread apply all bt" -batch -p $pid 2> /dev/null
+#      sleep 0.01
+#    done
+#
+# [1] http://poormansprofiler.org/
+#
+# Copyright 2014 Gabriel Corona. All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+use strict;
+
+my $current = "";
+my $previous_function = "";
+
+my %stacks;
+
+while(<>) {
+  chomp;
+  if (m/^Thread/) {
+    $current=""
+  }
+  elsif(m/^#[0-9]* *([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)/) {
+    my $function = $3;
+    my $alt = $1;
+    if(not($1 =~ /0x[a-zA-Z0-9]*/)) {
+      $function = $alt;
+    }
+    if ($current eq "") {
+      $current = $function;
+    } else {
+      $current = $function . ";" . $current;
+    }
+  } elsif(!($current eq "")) {
+    $stacks{$current} += 1;
+    $current = "";
+  }
+}
+
+if(!($current eq "")) {
+  $stacks{$current} += 1;
+  $current = "";
+}
+
+foreach my $k (sort { $a cmp $b } keys %stacks) {
+  print "$k $stacks{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-go.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-go.pl
@@ -0,0 +1,150 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-go.pl  collapse golang samples into single lines.
+#
+# Parses golang smaples generated by "go tool pprof" and outputs stacks as
+# single lines, with methods separated by semicolons, and then a space and an
+# occurrence count. For use with flamegraph.pl.
+#
+# USAGE: ./stackcollapse-go.pl infile > outfile
+#
+# Example Input:
+#   ...
+#   Samples:
+#   samples/count cpu/nanoseconds
+#        1   10000000: 1 2 
+#        2   10000000: 3 2 
+#        1   10000000: 4 2 
+#        ...
+#   Locations
+#        1: 0x58b265 scanblock :0 s=0
+#        2: 0x599530 GC :0 s=0
+#        3: 0x58a999 flushptrbuf :0 s=0
+#        4: 0x58d6a8 runtime.MSpan_Sweep :0 s=0
+#        ...
+#   Mappings
+#        ...
+#
+# Example Output:
+# 
+#   GC;flushptrbuf 2
+#   GC;runtime.MSpan_Sweep 1
+#   GC;scanblock 1
+#
+# Input may contain many stacks as generated from go tool pprof:
+#
+#   go tool pprof -seconds=60 -raw -output=a.pprof http://$ADDR/debug/pprof/profile
+#
+# For format of text profile, See golang/src/internal/pprof/profile/profile.go
+#
+# Copyright 2017 Sijie Yang (yangsijie@baidu.com).  All rights reserved.
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License
+#  as published by the Free Software Foundation; either version 2
+#  of the License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software Foundation,
+#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+#  (http://www.gnu.org/copyleft/gpl.html)
+#
+# 16-Jan-2017   Sijie Yang   Created this.
+
+use strict;
+
+use Getopt::Long;
+
+# tunables
+my $help = 0;
+
+sub usage {
+	die <<USAGE_END;
+USAGE: $0 infile > outfile\n
+USAGE_END
+}
+
+GetOptions(
+	'help' => \$help,
+) or usage();
+$help && usage();
+
+# internals
+my $state = "ignore";
+my %stacks;
+my %frames;
+my %collapsed;
+
+sub remember_stack {
+	my ($stack, $count) = @_;
+	$stacks{$stack} += $count;
+}
+
+#
+# Output stack string in required format. For example, for the following samples,
+# format_statck() would return GC;runtime.MSpan_Sweep for stack "4 2"
+#
+#   Locations
+#        1: 0x58b265 scanblock :0 s=0
+#        2: 0x599530 GC :0 s=0
+#        3: 0x58a999 flushptrbuf :0 s=0
+#        4: 0x58d6a8 runtime.MSpan_Sweep :0 s=0
+#
+sub format_statck {
+	my ($stack) = @_;
+	my @loc_list = split(/ /, $stack);
+
+	for (my $i=0; $i<=$#loc_list; $i++) {
+		my $loc_name = $frames{$loc_list[$i]};
+		$loc_list[$i] = $loc_name if ($loc_name);
+	}
+	return join(";", reverse(@loc_list));
+}
+
+foreach (<>) {
+	next if m/^#/;
+	chomp;
+
+	if ($state eq "ignore") {
+		if (/Samples:/) {
+		$state = "sample";
+			next;
+		}
+
+	} elsif ($state eq "sample") {
+		if (/^\s*([0-9]+)\s*[0-9]+: ([0-9 ]+)/) {
+			my $samples = $1;
+			my $stack = $2;
+			remember_stack($stack, $samples);
+		} elsif (/Locations/) {
+			$state = "location";
+			next;
+		}
+
+	} elsif ($state eq "location") {
+		if (/^\s*([0-9]*): 0x[0-9a-f]+ (M=[0-9]+ )?([^ ]+) .*/) {
+			my $loc_id = $1;
+			my $loc_name = $3;
+			$frames{$loc_id} = $loc_name;
+		} elsif (/Mappings/) {
+			$state = "mapping";
+			last;
+		}
+	}
+}
+
+foreach my $k (keys %stacks) {
+	my $stack = format_statck($k);
+	my $count = $stacks{$k};
+	$collapsed{$stack} += $count;
+}
+
+foreach my $k (sort { $a cmp $b } keys %collapsed) {
+	print "$k $collapsed{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-ibmjava.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-ibmjava.pl
@@ -0,0 +1,145 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-ibmjava.pl	collapse jstack samples into single lines.
+#
+# Parses Java stacks generated by IBM Java with methods separated by semicolons, 
+# and then a space and an occurrence count.
+#
+# USAGE: ./stackcollapse-ibmjava.pl infile > outfile
+#
+# Example input:
+#
+#  NULL           
+#  1XMTHDINFO     Thread Details
+#  NULL           
+#  NULL
+#  3XMTHREADINFO      "Default Executor-thread-149164" J9VMThread:0x0000000008132B00, j9thread_t:0x000000001A810B90, java/lang/Thread:0x0000000712BE8E48, state:R, prio=5
+#  3XMJAVALTHREAD            (java/lang/Thread getId:0x3493E, isDaemon:true)
+#  3XMTHREADINFO1            (native thread ID:0x3158, native priority:0x5, native policy:UNKNOWN, vmstate:R, vm thread flags:0x00000001)
+#  3XMCPUTIME               CPU usage total: 0.421875000 secs, user: 0.343750000 secs, system: 0.078125000 secs, current category="Application"
+#  3XMHEAPALLOC             Heap bytes allocated since last GC cycle=0 (0x0)
+#  3XMTHREADINFO3           Java callstack:
+#  4XESTACKTRACE                at java/net/SocketInputStream.socketRead0(Native Method)
+#  4XESTACKTRACE                at java/net/SocketInputStream.socketRead(SocketInputStream.java:127(Compiled Code))
+#  4XESTACKTRACE                at java/net/SocketInputStream.read(SocketInputStream.java:182(Compiled Code))
+#  4XESTACKTRACE                at java/net/SocketInputStream.read(SocketInputStream.java:152(Compiled Code))
+#  4XESTACKTRACE                at java/io/FilterInputStream.read(FilterInputStream.java:144(Compiled Code))
+#  ...
+#  4XESTACKTRACE                at java/lang/Thread.run(Thread.java:785(Compiled Code))
+#
+# Example output:
+#
+#  Default Executor-thread-149164;java/lang/Thread.run;java/net/SocketInputStream/read;java/net/SocketInputStream.socketRead0 1
+#
+#
+# Copyright 2014 Federico Juinio.  All rights reserved.
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License
+#  as published by the Free Software Foundation; either version 2
+#  of the License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software Foundation,
+#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+#  (http://www.gnu.org/copyleft/gpl.html)
+#
+# 23-Aug-2023   Federico Juinio created this based from stackcollapse-jstack.pl
+
+use strict;
+
+use Getopt::Long;
+
+# tunables
+my $include_tname = 1;		# include thread names in stacks
+my $include_tid = 0;		# include thread IDs in stacks
+my $shorten_pkgs = 0;		# shorten package names
+my $help = 0;
+
+sub usage {
+	die <<USAGE_END;
+USAGE: $0 [options] infile > outfile\n
+	--include-tname
+	--no-include-tname # include/omit thread names in stacks (default: include)
+	--include-tid
+	--no-include-tid   # include/omit thread IDs in stacks (default: omit)
+	--shorten-pkgs
+	--no-shorten-pkgs  # (don't) shorten package names (default: don't shorten)
+
+	eg,
+	$0 --no-include-tname stacks.txt > collapsed.txt
+USAGE_END
+}
+
+GetOptions(
+	'include-tname!'  => \$include_tname,
+	'include-tid!'    => \$include_tid,
+	'shorten-pkgs!'   => \$shorten_pkgs,
+	'help'            => \$help,
+) or usage();
+$help && usage();
+
+
+# internals
+my %collapsed;
+
+sub remember_stack {
+	my ($stack, $count) = @_;
+	$collapsed{$stack} += $count;
+}
+
+my @stack;
+my $tname;
+my $state = "?";
+
+foreach (<>) {
+	next if m/^#/;
+	chomp;
+
+	if (m/^3XMTHREADINFO3           Native callstack:/) {
+		# save stack
+		if (defined $tname) { unshift @stack, $tname; }
+		remember_stack(join(";", @stack), 1) if @stack;
+		undef @stack;
+		undef $tname;
+		$state = "?";
+		next;
+	}
+
+	# look for thread header line and parse thread name and state
+	if (/^3XMTHREADINFO      "([^"]*).* state:(.*), /) {
+		my $name = $1;
+		if ($include_tname) {
+			$tname = $name;
+		}
+		$state = $2;
+	# special handling for "Anonymous native threads"
+	} elsif (/3XMTHREADINFO      Anonymous native thread/) {
+		$tname = "Anonymous native thread";
+	# look for thread id
+	} elsif (/^3XMTHREADINFO1            \(native thread ID:([^ ]*), native priority/) {
+		if ($include_tname && $include_tid) {
+			$tname = $tname . "-" . $1
+		}
+	# collect stack frames
+	} elsif (/^4XESTACKTRACE                at ([^\(]*)/) {
+		my $func = $1;
+		if ($shorten_pkgs) {
+			my ($pkgs, $clsFunc) = ( $func =~ m/(.*\.)([^.]+\.[^.]+)$/ );
+			$pkgs =~ s/(\w)\w*/$1/g;
+			$func = $pkgs . $clsFunc;
+		}
+		unshift @stack, $func;
+
+	}
+}
+
+foreach my $k (sort { $a cmp $b } keys %collapsed) {
+	print "$k $collapsed{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-instruments.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-instruments.pl
@@ -0,0 +1,34 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-instruments.pl
+#
+# Parses a file containing a call tree as produced by XCode Instruments
+# (Edit > Deep Copy) and produces output suitable for flamegraph.pl.
+#
+# USAGE: ./stackcollapse-instruments.pl infile > outfile
+
+use strict;
+
+my @stack = ();
+
+<>;
+foreach (<>) {
+	chomp;
+	/\d+\.\d+ (?:min|s|ms)\s+\d+\.\d+%\s+(\d+(?:\.\d+)?) (min|s|ms)\t \t(\s*)(.+)/ or die;
+	my $func = $4;
+	my $depth = length ($3);
+	$stack [$depth] = $4;
+	foreach my $i (0 .. $depth - 1) {
+		print $stack [$i];
+		print ";";
+	}
+
+	my $time = 0 + $1;
+	if ($2 eq "min") {
+		$time *= 60*1000;
+	} elsif ($2 eq "s") {
+		$time *= 1000;
+	}
+
+	printf("%s %.0f\n", $func, $time);
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-java-exceptions.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-java-exceptions.pl
@@ -0,0 +1,72 @@
+#!/usr/bin/perl -w
+#
+# stackcolllapse-java-exceptions.pl	collapse java exceptions (found in logs) into single lines.
+#
+# Parses Java error stacks found in a log file and outputs them as
+# single lines, with methods separated by semicolons, and then a space and an
+# occurrence count. Inspired by stackcollapse-jstack.pl except that it does
+# not act as a performance profiler.
+#
+# It can be useful if a Java process dumps a lot of different stacks in its logs
+# and you want to quickly identify the biggest culprits.
+#
+# USAGE: ./stackcollapse-java-exceptions.pl infile > outfile
+#
+# Copyright 2018 Paul de Verdiere. All rights reserved.
+
+use strict;
+use Getopt::Long;
+
+# tunables
+my $shorten_pkgs = 0;		# shorten package names
+my $no_pkgs = 0;		    # really shorten package names!!
+my $help = 0;
+
+sub usage {
+	die <<USAGE_END;
+USAGE: $0 [options] infile > outfile\n
+	--shorten-pkgs : shorten package names
+  --no-pkgs      : suppress package names (makes SVG much more readable)
+
+USAGE_END
+}
+
+GetOptions(
+	'shorten-pkgs!'   => \$shorten_pkgs,
+	'no-pkgs!'        => \$no_pkgs,
+	'help'            => \$help,
+) or usage();
+$help && usage();
+
+my %collapsed;
+
+sub remember_stack {
+	my ($stack, $count) = @_;
+	$collapsed{$stack} += $count;
+}
+
+my @stack;
+
+foreach (<>) {
+	chomp;
+
+  if (/^\s*at ([^\(]*)/) {
+		my $func = $1;
+		if ($shorten_pkgs || $no_pkgs) {
+			my ($pkgs, $clsFunc) = ( $func =~ m/(.*\.)([^.]+\.[^.]+)$/ );
+			$pkgs =~ s/(\w)\w*/$1/g;
+      $func = $no_pkgs ? $clsFunc: $pkgs . $clsFunc;
+		}
+		unshift @stack, $func;
+	} elsif (@stack ) {
+		next if m/.*waiting on .*/;
+		remember_stack(join(";", @stack), 1) if @stack;
+		undef @stack;
+  }
+}
+
+remember_stack(join(";", @stack), 1) if @stack;
+
+foreach my $k (sort { $a cmp $b } keys %collapsed) {
+	print "$k $collapsed{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-jstack.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-jstack.pl
@@ -0,0 +1,176 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-jstack.pl	collapse jstack samples into single lines.
+#
+# Parses Java stacks generated by jstack(1) and outputs RUNNABLE stacks as
+# single lines, with methods separated by semicolons, and then a space and an
+# occurrence count. This also filters some other "RUNNABLE" states that we
+# know are probably not running, such as epollWait. For use with flamegraph.pl.
+#
+# You want this to process the output of at least 100 jstack(1)s. ie, run it
+# 100 times with a sleep interval, and append to a file. This is really a poor
+# man's Java profiler, due to the overheads of jstack(1), and how it isn't
+# capturing stacks asynchronously. For a better profiler, see:
+# http://www.brendangregg.com/blog/2014-06-12/java-flame-graphs.html
+#
+# USAGE: ./stackcollapse-jstack.pl infile > outfile
+#
+# Example input:
+#
+# "MyProg" #273 daemon prio=9 os_prio=0 tid=0x00007f273c038800 nid=0xe3c runnable [0x00007f28a30f2000]
+#    java.lang.Thread.State: RUNNABLE
+#        at java.net.SocketInputStream.socketRead0(Native Method)
+#        at java.net.SocketInputStream.read(SocketInputStream.java:121)
+#        ...
+#        at java.lang.Thread.run(Thread.java:744)
+#
+# Example output:
+#
+#  MyProg;java.lang.Thread.run;java.net.SocketInputStream.read;java.net.SocketInputStream.socketRead0 1
+#
+# Input may be created and processed using:
+#
+#  i=0; while (( i++ < 200 )); do jstack PID >> out.jstacks; sleep 10; done
+#  cat out.jstacks | ./stackcollapse-jstack.pl > out.stacks-folded
+#
+# WARNING: jstack(1) incurs overheads. Test before use, or use a real profiler.
+#
+# Copyright 2014 Brendan Gregg.  All rights reserved.
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License
+#  as published by the Free Software Foundation; either version 2
+#  of the License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software Foundation,
+#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+#  (http://www.gnu.org/copyleft/gpl.html)
+#
+# 14-Sep-2014	Brendan Gregg	Created this.
+
+use strict;
+
+use Getopt::Long;
+
+# tunables
+my $include_tname = 1;		# include thread names in stacks
+my $include_tid = 0;		# include thread IDs in stacks
+my $shorten_pkgs = 0;		# shorten package names
+my $help = 0;
+
+sub usage {
+	die <<USAGE_END;
+USAGE: $0 [options] infile > outfile\n
+	--include-tname
+	--no-include-tname # include/omit thread names in stacks (default: include)
+	--include-tid
+	--no-include-tid   # include/omit thread IDs in stacks (default: omit)
+	--shorten-pkgs
+	--no-shorten-pkgs  # (don't) shorten package names (default: don't shorten)
+
+	eg,
+	$0 --no-include-tname stacks.txt > collapsed.txt
+USAGE_END
+}
+
+GetOptions(
+	'include-tname!'  => \$include_tname,
+	'include-tid!'    => \$include_tid,
+	'shorten-pkgs!'   => \$shorten_pkgs,
+	'help'            => \$help,
+) or usage();
+$help && usage();
+
+
+# internals
+my %collapsed;
+
+sub remember_stack {
+	my ($stack, $count) = @_;
+	$collapsed{$stack} += $count;
+}
+
+my @stack;
+my $tname;
+my $state = "?";
+
+foreach (<>) {
+	next if m/^#/;
+	chomp;
+
+	if (m/^$/) {
+		# only include RUNNABLE states
+		goto clear if $state ne "RUNNABLE";
+
+		# save stack
+		if (defined $tname) { unshift @stack, $tname; }
+		remember_stack(join(";", @stack), 1) if @stack;
+clear:
+		undef @stack;
+		undef $tname;
+		$state = "?";
+		next;
+	}
+
+	#
+	# While parsing jstack output, the $state variable may be altered from
+	# RUNNABLE to other states. This causes the stacks to be filtered later,
+	# since only RUNNABLE stacks are included.
+	#
+
+	if (/^"([^"]*)/) {
+		my $name = $1;
+
+		if ($include_tname) {
+			$tname = $name;
+			unless ($include_tid) {
+				$tname =~ s/-\d+$//;
+			}
+		}
+
+		# set state for various background threads
+		$state = "BACKGROUND" if $name =~ /C. CompilerThread/;
+		$state = "BACKGROUND" if $name =~ /Signal Dispatcher/;
+		$state = "BACKGROUND" if $name =~ /Service Thread/;
+		$state = "BACKGROUND" if $name =~ /Attach Listener/;
+
+	} elsif (/java.lang.Thread.State: (\S+)/) {
+		$state = $1 if $state eq "?";
+	} elsif (/^\s*at ([^\(]*)/) {
+		my $func = $1;
+		if ($shorten_pkgs) {
+			my ($pkgs, $clsFunc) = ( $func =~ m/(.*\.)([^.]+\.[^.]+)$/ );
+			$pkgs =~ s/(\w)\w*/$1/g;
+			$func = $pkgs . $clsFunc;
+		}
+		unshift @stack, $func;
+
+		# fix state for epollWait
+		$state = "WAITING" if $func =~ /epollWait/;
+		$state = "WAITING" if $func =~ /EPoll\.wait/;
+
+
+		# fix state for various networking functions
+		$state = "NETWORK" if $func =~ /socketAccept$/;
+		$state = "NETWORK" if $func =~ /Socket.*accept0$/;
+		$state = "NETWORK" if $func =~ /socketRead0$/;
+
+	} elsif (/^\s*-/ or /^2\d\d\d-/ or /^Full thread dump/ or
+		 /^JNI global references:/) {
+		# skip these info lines
+		next;
+	} else {
+		warn "Unrecognized line: $_";
+	}
+}
+
+foreach my $k (sort { $a cmp $b } keys %collapsed) {
+	print "$k $collapsed{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-ljp.awk
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-ljp.awk
@@ -0,0 +1,74 @@
+#!/usr/bin/awk -f
+#
+# stackcollapse-ljp.awk	collapse lightweight java profile reports
+#				into single lines stacks.
+#
+# Parses a list of multiline stacks generated by:
+#
+#  https://code.google.com/p/lightweight-java-profiler
+#
+# and outputs a semicolon separated stack followed by a space and a count.
+#
+# USAGE: ./stackcollapse-ljp.pl infile > outfile
+#
+# Example input:
+#
+#  42 3  my_func_b(prog.java:455)
+#        my_func_a(prog.java:123)
+#        java.lang.Thread.run(Thread.java:744)
+#  [...]
+#
+# Example output:
+#
+#  java.lang.Thread.run;my_func_a;my_func_b 42
+#
+# The unused number is the number of frames in each stack.
+#
+# Copyright 2014 Brendan Gregg.  All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# 12-Jun-2014	Brendan Gregg	Created this.
+
+$1 == "Total" {
+	# We're done. Print last stack and exit.
+	print stack, count
+	exit
+}
+
+{
+	# Strip file location. Comment this out to keep.
+	gsub(/\(.*\)/, "")
+}
+
+NF == 3 {
+	# New stack begins. Print previous buffered stack.
+	if (count)
+		print stack, count
+
+	# Begin a new stack.
+	count = $1
+	stack = $3
+}
+
+NF == 1 {
+	# Build stack.
+	stack = $1 ";" stack
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-perf-sched.awk
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-perf-sched.awk
@@ -0,0 +1,228 @@
+#!/usr/bin/awk -f
+
+#
+# This program generates collapsed off-cpu stacks fit for use by flamegraph.pl
+# from scheduler data collected via perf_events.
+#
+# Outputs the cumulative time off cpu in us for each distinct stack observed.
+#
+# Some awk variables further control behavior:
+#
+#   record_tid          If truthy, causes all stack traces to include the
+#                       command and LWP id.
+#
+#   record_wake_stack   If truthy, stacks include the frames from the wakeup
+#                       event in addition to the sleep event.
+#                       See http://www.brendangregg.com/FlameGraphs/offcpuflamegraphs.html#Wakeup
+#
+#   recurse             If truthy, attempt to recursively identify and
+#                       visualize the full wakeup stack chain.
+#                       See http://www.brendangregg.com/FlameGraphs/offcpuflamegraphs.html#ChainGraph
+#
+#                       Note that this is only an approximation, as only the
+#                       last sleep event is recorded (e.g. if a thread slept
+#                       multiple times before waking another thread, only the
+#                       last sleep event is used). Implies record_wake_stack=1
+#
+# To set any of these variables from the command line, run via:
+#
+#    stackcollapse-perf-sched.awk -v recurse=1
+#
+# == Important warning ==
+#
+# WARNING: tracing all scheduler events is very high overhead in perf. Even
+# more alarmingly, there appear to be bugs in perf that prevent it from reliably
+# getting consistent traces (even with large trace buffers), causing it to
+# produce empty perf.data files with error messages of the form:
+#
+#   0x952790 [0x736d]: failed to process type: 3410
+#
+# This failure is not determinisitic, so re-executing perf record will
+# eventually succeed.
+#
+# == Usage ==
+#
+# First, record data via perf_events:
+#
+# sudo perf record -g -e 'sched:sched_switch' \
+#       -e 'sched:sched_stat_sleep' -e 'sched:sched_stat_blocked' \
+#       -p <pid> -o perf.data  -- sleep 1
+#
+# Then post process with this script:
+#
+# sudo perf script -f time,comm,pid,tid,event,ip,sym,dso,trace -i perf.data | \
+#       stackcollapse-perf-sched.awk -v recurse=1 | \
+#       flamegraph.pl --color=io --countname=us >out.svg
+#
+
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2015 by MemSQL. All rights reserved.
+#
+
+#
+# Match a perf captured variable, returning just the contents. For example, for
+# the following line, get_perf_captured_variable("pid") would return "27235":
+#
+#     swapper     0 [006] 708189.626415: sched:sched_stat_sleep: comm=memsqld pid=27235 delay=100078421 [ns
+#
+function get_perf_captured_variable(variable)
+{
+	match($0, variable "=[^[:space:]]+")
+	return substr($0, RSTART + length(variable) + 1,
+                      RLENGTH - length(variable) - 1)
+}
+
+#
+# The timestamp is the first field that ends in a colon, e.g.:
+#
+#     swapper     0 [006] 708189.626415: sched:sched_stat_sleep: comm=memsqld pid=27235 delay=100078421 [ns
+#
+# or
+#
+#     swapper     0/0     708189.626415: sched:sched_stat_sleep: comm=memsqld pid=27235 delay=100078421 [ns]
+#
+function get_perf_timestamp()
+{
+	match($0, " [^ :]+:")
+	return substr($0, RSTART + 1, RLENGTH - 2)
+}
+
+!/^#/ && /sched:sched_switch/ {
+	switchcommand = get_perf_captured_variable("comm")
+
+	switchpid = get_perf_captured_variable("prev_pid")
+
+	switchtime=get_perf_timestamp()
+
+	switchstack=""
+}
+
+#
+# Strip the function name from a stack entry
+#
+# Stack entry is expected to be formatted like:
+#           c60849 MyClass::Foo(unsigned long) (/home/areece/a.out)
+#
+function get_function_name()
+{
+	# We start from 2 since we don't need the hex offset.
+	# We stop at NF - 1 since we don't need the library path.
+	funcname = $2
+	for (i = 3; i <= NF - 1; i++) {
+		funcname = funcname " " $i
+	}
+	return funcname
+}
+
+(switchpid != 0 && /^\s/) {
+	if (switchstack == "")  {
+		switchstack = get_function_name()
+	} else {
+		switchstack = get_function_name() ";" switchstack
+	}
+}
+
+(switchpid != 0 && /^$/) {
+	switch_stacks[switchpid] = switchstack
+	delete last_switch_stacks[switchpid]
+	switch_time[switchpid] = switchtime
+
+	switchpid=0
+	switchcommand=""
+	switchstack=""
+}
+
+!/^#/ && (/sched:sched_stat_sleep/ || /sched:sched_stat_blocked/) {
+	wakecommand=$1
+	wakepid=$2
+
+	waketime=get_perf_timestamp()
+
+	stat_next_command = get_perf_captured_variable("comm")
+
+	stat_next_pid = get_perf_captured_variable("pid")
+
+	stat_delay_ns = int(get_perf_captured_variable("delay"))
+
+	wakestack=""
+}
+
+(stat_next_pid != 0 && /^\s/) {
+	if (wakestack == "") {
+		wakestack = get_function_name()
+	} else {
+		# We build the wakestack in reverse order.
+		wakestack = wakestack ";" get_function_name()
+	}
+}
+
+(stat_next_pid != 0 && /^$/) {
+	#
+	# For some reason, perf appears to output duplicate
+	# sched:sched_stat_sleep and sched:sched_stat_blocked events. We only
+	# handle the first event.
+	#
+	if (stat_next_pid in switch_stacks) {
+		last_wake_time[stat_next_pid] = waketime
+
+		stack = switch_stacks[stat_next_pid]
+		if (recurse || record_wake_stack) {
+			stack = stack ";" wakestack
+			if (record_tid) {
+				stack = stack ";" wakecommand "-" wakepid
+			} else {
+				stack = stack ";" wakecommand
+			}
+		}
+
+		if (recurse) {
+			if (last_wake_time[wakepid] > last_switch_time[stat_next_pid]) {
+				stack = stack ";-;" last_switch_stacks[wakepid]
+			}
+			last_switch_stacks[stat_next_pid] = stack
+		}
+
+		delete switch_stacks[stat_next_pid]
+
+		if (record_tid) {
+			stack_times[stat_next_command "-" stat_next_pid ";" stack] += stat_delay_ns
+		} else {
+			stack_times[stat_next_command ";" stack] += stat_delay_ns
+		}
+	}
+
+	wakecommand=""
+	wakepid=0
+	stat_next_pid=0
+	stat_next_command=""
+	stat_delay_ms=0
+}
+
+END {
+	for (stack in stack_times) {
+		if (int(stack_times[stack] / 1000) > 0) {
+			print stack, int(stack_times[stack] / 1000)
+		}
+	}
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-perf.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-perf.pl
@@ -0,0 +1,435 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-perf.pl	collapse perf samples into single lines.
+#
+# Parses a list of multiline stacks generated by "perf script", and
+# outputs a semicolon separated stack followed by a space and a count.
+# If memory addresses (+0xd) are present, they are stripped, and resulting
+# identical stacks are colased with their counts summed.
+#
+# USAGE: ./stackcollapse-perf.pl [options] infile > outfile
+#
+# Run "./stackcollapse-perf.pl -h" to list options.
+#
+# Example input:
+#
+#  swapper     0 [000] 158665.570607: cpu-clock:
+#         ffffffff8103ce3b native_safe_halt ([kernel.kallsyms])
+#         ffffffff8101c6a3 default_idle ([kernel.kallsyms])
+#         ffffffff81013236 cpu_idle ([kernel.kallsyms])
+#         ffffffff815bf03e rest_init ([kernel.kallsyms])
+#         ffffffff81aebbfe start_kernel ([kernel.kallsyms].init.text)
+#  [...]
+#
+# Example output:
+#
+#  swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1
+#
+# Input may be created and processed using:
+#
+#  perf record -a -g -F 997 sleep 60
+#  perf script | ./stackcollapse-perf.pl > out.stacks-folded
+#
+# The output of "perf script" should include stack traces. If these are missing
+# for you, try manually selecting the perf script output; eg:
+#
+#  perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace | ...
+#
+# This is also required for the --pid or --tid options, so that the output has
+# both the PID and TID.
+#
+# Copyright 2012 Joyent, Inc.  All rights reserved.
+# Copyright 2012 Brendan Gregg.  All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# 02-Mar-2012	Brendan Gregg	Created this.
+# 02-Jul-2014	   "	  "	Added process name to stacks.
+
+use strict;
+use Getopt::Long;
+
+my %collapsed;
+
+sub remember_stack {
+	my ($stack, $count) = @_;
+	$collapsed{$stack} += $count;
+}
+my $annotate_kernel = 0; # put an annotation on kernel function
+my $annotate_jit = 0;   # put an annotation on jit symbols
+my $annotate_all = 0;   # enale all annotations
+my $include_pname = 1;	# include process names in stacks
+my $include_pid = 0;	# include process ID with process name
+my $include_tid = 0;	# include process & thread ID with process name
+my $include_addrs = 0;	# include raw address where a symbol can't be found
+my $tidy_java = 1;	# condense Java signatures
+my $tidy_generic = 1;	# clean up function names a little
+my $target_pname;	# target process name from perf invocation
+my $event_filter = "";    # event type filter, defaults to first encountered event
+my $event_defaulted = 0;  # whether we defaulted to an event (none provided)
+my $event_warning = 0;	  # if we printed a warning for the event
+
+my $show_inline = 0;
+my $show_context = 0;
+
+my $srcline_in_input = 0; # if there are extra lines with source location (perf script -F+srcline)
+GetOptions('inline' => \$show_inline,
+           'context' => \$show_context,
+           'srcline' => \$srcline_in_input,
+           'pid' => \$include_pid,
+           'kernel' => \$annotate_kernel,
+           'jit' => \$annotate_jit,
+           'all' => \$annotate_all,
+           'tid' => \$include_tid,
+           'addrs' => \$include_addrs,
+           'event-filter=s' => \$event_filter)
+or die <<USAGE_END;
+USAGE: $0 [options] infile > outfile\n
+	--pid		# include PID with process names [1]
+	--tid		# include TID and PID with process names [1]
+	--inline	# un-inline using addr2line
+	--all		# all annotations (--kernel --jit)
+	--kernel	# annotate kernel functions with a _[k]
+	--jit		# annotate jit functions with a _[j]
+	--context	# adds source context to --inline
+	--srcline	# parses output of 'perf script -F+srcline' and adds source context
+	--addrs		# include raw addresses where symbols can't be found
+	--event-filter=EVENT	# event name filter\n
+[1] perf script must emit both PID and TIDs for these to work; eg, Linux < 4.1:
+	perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace
+    for Linux >= 4.1:
+	perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace
+    If you save this output add --header on Linux >= 3.14 to include perf info.
+USAGE_END
+
+if ($annotate_all) {
+	$annotate_kernel = $annotate_jit = 1;
+}
+
+my %inlineCache;
+
+my %nmCache;
+
+sub inlineCacheAdd {
+	my ($pc, $mod, $result) = @_;
+   if (defined($inlineCache{$pc})) {
+      $inlineCache{$pc}{$mod} = $result;
+   } else {
+      $inlineCache{$pc} = {$mod => $result};
+   }
+}
+
+# for the --inline option
+sub inline {
+	my ($pc, $rawfunc, $mod) = @_;
+
+	return $inlineCache{$pc}{$mod} if defined($inlineCache{$pc}{$mod});
+
+	# capture addr2line output
+	my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`;
+
+	# remove first line
+	$a2l_output =~ s/^(.*\n){1}//;
+
+	if ($a2l_output =~ /\?\?\n\?\?:0/) {
+		# if addr2line fails and rawfunc is func+offset, then fall back to it
+		if ($rawfunc =~ /^(.+)\+0x([0-9a-f]+)$/) {
+			my $func = $1;
+			my $addr = hex $2;
+
+			$nmCache{$mod}=`nm $mod` unless defined $nmCache{$mod};
+
+			if ($nmCache{$mod} =~ /^([0-9a-f]+) . \Q$func\E$/m) {
+			   my $base = hex $1;
+				my $newPc = sprintf "0x%x", $base+$addr;
+				my $result = inline($newPc, '', $mod);
+				inlineCacheAdd($pc, $mod, $result);
+				return $result;
+			}
+		}
+	}
+
+	my @fullfunc;
+	my $one_item = "";
+	for (split /^/, $a2l_output) {
+		chomp $_;
+
+		# remove discriminator info if exists
+		$_ =~ s/ \(discriminator \S+\)//;
+
+		if ($one_item eq "") {
+			$one_item = $_;
+		} else {
+			if ($show_context == 1) {
+				unshift @fullfunc, $one_item . ":$_";
+			} else {
+				unshift @fullfunc, $one_item;
+			}
+			$one_item = "";
+		}
+	}
+
+	my $result = join ";" , @fullfunc;
+
+	inlineCacheAdd($pc, $mod, $result);
+
+	return $result;
+}
+
+my @stack;
+my $pname;
+my $m_pid;
+my $m_tid;
+my $m_period;
+
+#
+# Main loop
+#
+while (defined($_ = <>)) {
+
+	# find the name of the process launched by perf, by stepping backwards
+	# over the args to find the first non-option (no dash):
+	if (/^# cmdline/) {
+		my @args = split ' ', $_;
+		foreach my $arg (reverse @args) {
+			if ($arg !~ /^-/) {
+				$target_pname = $arg;
+				$target_pname =~ s:.*/::;  # strip pathname
+				last;
+			}
+		}
+	}
+
+	# skip remaining comments
+	next if m/^#/;
+	chomp;
+
+	# end of stack. save cached data.
+	if (m/^$/) {
+		# ignore filtered samples
+		next if not $pname;
+
+		if ($include_pname) {
+			if (defined $pname) {
+				unshift @stack, $pname;
+			} else {
+				unshift @stack, "";
+			}
+		}
+		remember_stack(join(";", @stack), $m_period) if @stack;
+		undef @stack;
+		undef $pname;
+		next;
+	}
+
+	#
+	# event record start
+	#
+	if (/^(\S.+?)\s+(\d+)\/*(\d+)*\s+/) {
+		# default "perf script" output has TID but not PID
+		# eg, "java 25607 4794564.109216: 1 cycles:"
+		# eg, "java 12688 [002] 6544038.708352: 235 cpu-clock:"
+		# eg, "V8 WorkerThread 25607 4794564.109216: 104345 cycles:"
+		# eg, "java 24636/25607 [000] 4794564.109216: 1 cycles:"
+		# eg, "java 12688/12764 6544038.708352: 10309278 cpu-clock:"
+		# eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: 100 cycles:"
+		# other combinations possible
+		my ($comm, $pid, $tid, $period) = ($1, $2, $3, "");
+		if (not $tid) {
+			$tid = $pid;
+			$pid = "?";
+		}
+
+		if (/:\s*(\d+)*\s+(\S+):\s*$/) {
+			$period = $1;
+			my $event = $2;
+
+			if ($event_filter eq "") {
+				# By default only show events of the first encountered
+				# event type. Merging together different types, such as
+				# instructions and cycles, produces misleading results.
+				$event_filter = $event;
+				$event_defaulted = 1;
+			} elsif ($event ne $event_filter) {
+				if ($event_defaulted and $event_warning == 0) {
+					# only print this warning if necessary:
+					# when we defaulted and there was
+					# multiple event types.
+					print STDERR "Filtering for events of type: $event\n";
+					$event_warning = 1;
+				}
+				next;
+			}
+		}
+
+		if (not $period) {
+			$period = 1
+		}
+		($m_pid, $m_tid, $m_period) = ($pid, $tid, $period);
+
+		if ($include_tid) {
+			$pname = "$comm-$m_pid/$m_tid";
+		} elsif ($include_pid) {
+			$pname = "$comm-$m_pid";
+		} else {
+			$pname = "$comm";
+		}
+		$pname =~ tr/ /_/;
+
+	#
+	# stack line
+	#
+	} elsif (/^\s*(\w+)\s*(.+) \((.*)\)/) {
+		# ignore filtered samples
+		next if not $pname;
+
+		my ($pc, $rawfunc, $mod) = ($1, $2, $3);
+
+		if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) {
+			my $inlineRes = inline($pc, $rawfunc, $mod);
+			# - empty result this happens e.g., when $mod does not exist or is a path to a compressed kernel module
+			#   if this happens, the user will see error message from addr2line written to stderr
+			# - if addr2line results in "??" , then it's much more sane to fall back than produce a '??' in graph
+			if($inlineRes ne "" and $inlineRes ne "??" and $inlineRes ne "??:??:0" ) {
+				unshift @stack, $inlineRes;
+				next;
+			}
+		}
+
+		# Linux 4.8 included symbol offsets in perf script output by default, eg:
+		# 7fffb84c9afc cpu_startup_entry+0x800047c022ec ([kernel.kallsyms])
+		# strip these off:
+		$rawfunc =~ s/\+0x[\da-f]+$//;
+
+		next if $rawfunc =~ /^\(/;		# skip process names
+
+		my $is_unknown=0;
+		my @inline;
+		for (split /\->/, $rawfunc) {
+			my $func = $_;
+
+			if ($func eq "[unknown]") {
+				if ($mod ne "[unknown]") { # use module name instead, if known
+					$func = $mod;
+					$func =~ s/.*\///;
+				} else {
+					$func = "unknown";
+					$is_unknown=1;
+				}
+
+				if ($include_addrs) {
+					$func = "\[$func \<$pc\>\]";
+				} else {
+					$func = "\[$func\]";
+				}
+			}
+
+			if ($tidy_generic) {
+				$func =~ s/;/:/g;
+				if ($func !~ m/\.\(.*\)\./) {
+					# This doesn't look like a Go method name (such as
+					# "net/http.(*Client).Do"), so everything after the first open
+					# paren (that is not part of an "(anonymous namespace)") is
+					# just noise.
+					$func =~ s/\((?!anonymous namespace\)).*//;
+				}
+				# now tidy this horrible thing:
+				# 13a80b608e0a RegExp:[&<>\"\'] (/tmp/perf-7539.map)
+				$func =~ tr/"\'//d;
+				# fall through to $tidy_java
+			}
+
+			if ($tidy_java and $pname =~ m/^java/) {
+				# along with $tidy_generic, converts the following:
+				#	Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/ContextAction;)Ljava/lang/Object;
+				#	Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/C
+				#	Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
+				# into:
+				#	org/mozilla/javascript/ContextFactory:.call
+				#	org/mozilla/javascript/ContextFactory:.call
+				#	org/mozilla/javascript/MemberBox:.init
+				$func =~ s/^L// if $func =~ m:/:;
+			}
+
+			#
+			# Annotations
+			#
+			# detect inlined from the @inline array
+			# detect kernel from the module name; eg, frames to parse include:
+			#          ffffffff8103ce3b native_safe_halt ([kernel.kallsyms]) 
+			#          8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux)
+			#          7d8 ipv4_conntrack_local+0x7f8f80b8 ([nf_conntrack_ipv4])
+			# detect jit from the module name; eg:
+			#          7f722d142778 Ljava/io/PrintStream;::print (/tmp/perf-19982.map)
+			if (scalar(@inline) > 0) {
+				$func .= "_[i]" unless $func =~ m/\_\[i\]/;	# inlined
+			} elsif ($annotate_kernel == 1 && $mod =~ m/(^\[|vmlinux$)/ && $mod !~ /unknown/) {
+				$func .= "_[k]";	# kernel
+			} elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) {
+				$func .= "_[j]" unless $func =~ m/\_\[j\]/;	# jitted
+			}
+
+			#
+			# Source lines
+			#
+			#
+			# Sample outputs:
+			#   | a.out 35081 252436.005167:     667783 cycles:
+			#   |                   408ebb some_method_name+0x8b (/full/path/to/a.out)
+			#   |   uniform_int_dist.h:300
+			#   |                   4069f5 main+0x935 (/full/path/to/a.out)
+			#   |   file.cpp:137
+			#   |             7f6d2148eb25 __libc_start_main+0xd5 (/lib64/libc-2.33.so)
+			#   |   libc-2.33.so[27b25]
+			#
+			#   | a.out 35081 252435.738165:     306459 cycles:
+			#   |             7f6d213c2750 [unknown] (/usr/lib64/libkmod.so.2.3.6)
+			#   |   libkmod.so.2.3.6[6750]
+			#
+			#   | a.out 35081 252435.738373:     315813 cycles:
+			#   |             7f6d215ca51b __strlen_avx2+0x4b (/lib64/libc-2.33.so)
+			#   |   libc-2.33.so[16351b]
+			#   |             7ffc71ee9580 [unknown] ([unknown])			
+			#   |
+			#
+			#   | a.out 35081 252435.718940:     247984 cycles:
+			#   |         ffffffff814f9302 up_write+0x32 ([kernel.kallsyms])
+			#   |   [kernel.kallsyms][ffffffff814f9302]
+			if($srcline_in_input and not $is_unknown){
+				$_ = <>;
+				chomp;
+				s/\[.*?\]//g;
+				s/^\s*//g;
+				s/\s*$//g;
+				$func.=':'.$_ unless $_ eq "";
+			}
+
+			push @inline, $func;
+		}
+
+		unshift @stack, @inline;
+	} else {
+		warn "Unrecognized line: $_";
+	}
+}
+
+foreach my $k (sort { $a cmp $b } keys %collapsed) {
+	print "$k $collapsed{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-pmc.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-pmc.pl
@@ -0,0 +1,74 @@
+#!/usr/bin/env perl
+#
+# Copyright (c) 2014 Ed Maste.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# stackcollapse-pmc.pl		collapse hwpmc samples into single lines.
+#
+# Parses a list of multiline stacks generated by "hwpmc -G", and outputs a
+# semicolon-separated stack followed by a space and a count.
+#
+# Usage:
+#   pmcstat -S unhalted-cycles -O pmc.out
+#   pmcstat -R pmc.out -z16 -G pmc.graph
+#   stackcollapse-pmc.pl pmc.graph > pmc.stack
+#
+# Example input:
+#
+# 03.07%  [17]       witness_unlock @ /boot/kernel/kernel
+#  70.59%  [12]        __mtx_unlock_flags
+#   16.67%  [2]          selfdfree
+#    100.0%  [2]           sys_poll
+#     100.0%  [2]            amd64_syscall
+#   08.33%  [1]          pmap_ts_referenced
+#    100.0%  [1]           vm_pageout
+#     100.0%  [1]            fork_exit
+# ...
+#
+# Example output:
+#
+# amd64_syscall;sys_poll;selfdfree;__mtx_unlock_flags;witness_unlock 2
+# amd64_syscall;sys_poll;pmap_ts_referenced;__mtx_unlock_flagsgeout;fork_exit 1
+# ...
+
+use warnings;
+use strict;
+
+my @stack;
+my $prev_count;
+my $prev_indent = -1;
+
+while (defined($_ = <>)) {
+	if (m/^( *)[0-9.]+%  \[([0-9]+)\]\s*(\S+)/) {
+		my $indent = length($1);
+		if ($indent <= $prev_indent) {
+			print join(';', reverse(@stack[0 .. $prev_indent])) .
+			    " $prev_count\n";
+		}
+		$stack[$indent] = $3;
+		$prev_count = $2;
+		$prev_indent = $indent;
+	}
+}
+print join(';', reverse(@stack[0 .. $prev_indent])) .  " $prev_count\n";
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-recursive.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-recursive.pl
@@ -0,0 +1,60 @@
+#!/usr/bin/perl -ws
+#
+# stackcollapse-recursive  Collapse direct recursive backtraces
+#
+# Post-process a stack list and merge direct recursive calls:
+#
+# Example input:
+#
+#     main;recursive;recursive;recursive;helper 1
+#
+# Output:
+#
+#     main;recursive;helper 1
+#
+# Copyright 2014 Gabriel Corona. All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+my %stacks;
+
+while(<>) {
+  chomp;
+  my ($stack_, $value) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
+  if ($stack_) {
+    my @stack  = split(/;/, $stack_);
+
+    my @result = ();
+    my $i;
+    my $last="";
+    for($i=0; $i!=@stack; ++$i) {
+      if(!($stack[$i] eq $last)) {
+        $result[@result] = $stack[$i];
+        $last = $stack[$i];
+      }
+    }
+
+    $stacks{join(";", @result)} += $value;
+  }
+}
+
+foreach my $k (sort { $a cmp $b } keys %stacks) {
+  print "$k $stacks{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-sample.awk
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-sample.awk
@@ -0,0 +1,231 @@
+#!/usr/bin/awk -f
+#
+# Uses MacOS' /usr/bin/sample to generate a flamegraph of a process
+#
+# Usage:
+#
+# sudo sample [pid] -file /dev/stdout | stackcollapse-sample.awk | flamegraph.pl
+#
+# Options:
+#
+# The output will show the name of the library/framework at the call-site
+# with the form AppKit`NSApplication or libsystem`start_wqthread.
+#
+# If showing the framework or library name is not required, pass
+# MODULES=0 as an argument of the sample program.
+#
+# The generated SVG will be written to the output stream, and can be piped
+# into flamegraph.pl directly, or written to a file for conversion later.
+#
+# ---
+#
+# Copyright (c) 2017, Apple Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+BEGIN {
+
+  # Command line options
+  MODULES = 1       # Allows the user to enable/disable printing of modules.
+
+  # Internal variables
+  _FOUND_STACK = 0  # Found the stack traces in the output.
+  _LEVEL = -1       # The current level of indentation we are running.
+
+  # The set of symbols to ignore for 'waiting' threads, for ease of use.
+  # This will hide waiting threads from the view, making it easier to
+  # see what is actually running in the sample. These may be adjusted
+  # as necessary or appended to if other symbols need to be filtered out.
+
+  _IGNORE["libsystem_kernel`__psynch_cvwait"] = 1
+  _IGNORE["libsystem_kernel`__select"] = 1
+  _IGNORE["libsystem_kernel`__semwait_signal"] = 1
+  _IGNORE["libsystem_kernel`__ulock_wait"] = 1
+  _IGNORE["libsystem_kernel`__wait4"] = 1
+  _IGNORE["libsystem_kernel`__workq_kernreturn"] = 1
+  _IGNORE["libsystem_kernel`kevent"] = 1
+  _IGNORE["libsystem_kernel`mach_msg_trap"] = 1
+  _IGNORE["libsystem_kernel`read"] = 1
+  _IGNORE["libsystem_kernel`semaphore_wait_trap"] = 1
+
+  # The same set of symbols as above, without the module name.
+  _IGNORE["__psynch_cvwait"] = 1
+  _IGNORE["__select"] = 1
+  _IGNORE["__semwait_signal"] = 1
+  _IGNORE["__ulock_wait"] = 1
+  _IGNORE["__wait4"] = 1
+  _IGNORE["__workq_kernreturn"] = 1
+  _IGNORE["kevent"] = 1
+  _IGNORE["mach_msg_trap"] = 1
+  _IGNORE["read"] = 1
+  _IGNORE["semaphore_wait_trap"] = 1
+
+}
+
+# This is the first line in the /usr/bin/sample output that indicates the
+# samples follow subsequently. Until we see this line, the rest is ignored.
+
+/^Call graph/ {
+  _FOUND_STACK = 1
+}
+
+# This is found when we have reached the end of the stack output.
+# Identified by the string "Total number in stack (...)".
+
+/^Total number/ {
+  _FOUND_STACK = 0
+  printStack(_NEST,0)
+}
+
+# Prints the stack from FROM to TO (where FROM > TO)
+# Called when indenting back from a previous level, or at the end
+# of processing to flush the last recorded sample
+
+function printStack(FROM,TO) {
+
+  # We ignore certain blocking wait states, in the absence of being
+  # able to filter these threads from collection, otherwise
+  # we'll end up with many threads of equal length that represent
+  # the total time the sample was collected.
+  #
+  # Note that we need to collect the information to ensure that the
+  # timekeeping for the parental functions is appropriately adjusted
+  # so we just avoid printing it out when that occurs.
+  _PRINT_IT = !_IGNORE[_NAMES[FROM]]
+
+  # We run through all the names, from the root to the leaf, so that
+  # we generate a line that flamegraph.pl will like, of the form:
+  # Thread1234;example`main;example`otherFn 1234
+
+  for(l = FROM; l>=TO; l--) {
+    if (_PRINT_IT) {
+      printf("%s", _NAMES[0])
+      for(i=1; i<=l; i++) {
+        printf(";%s", _NAMES[i])
+      }
+      print " " _TIMES[l]
+    }
+
+    # We clean up our current state to avoid bugs.
+    delete _NAMES[l]
+    delete _TIMES[l]
+  }
+}
+
+# This is where we process each line, of the form:
+#  5130 Thread_8749954
+#    + 5130 start_wqthread  (in libsystem_pthread.dylib) ...
+#    +   4282 _pthread_wqthread  (in libsystem_pthread.dylib) ...
+#    +   ! 4282 __doworkq_kernreturn  (in libsystem_kernel.dylib) ...
+#    +   848 _pthread_wqthread  (in libsystem_pthread.dylib) ...
+#    +     848 __doworkq_kernreturn  (in libsystem_kernel.dylib) ...
+
+_FOUND_STACK && match($0,/^    [^0-9]*[0-9]/) {
+
+  # We maintain two counters:
+  #   _LEVEL: the high water mark of the indentation level we have seen.
+  #   _NEST:  the current indentation level.
+  #
+  # We keep track of these two levels such that when the nesting level
+  # decreases, we print out the current state of where we are.
+
+  _NEST=(RLENGTH-5)/2
+  sub(/^[^0-9]*/,"") # Normalise the leading content so we start with time.
+  _TIME=$1           # The time recorded by 'sample', first integer value.
+
+  # The function name is in one or two parts, depending on what kind of
+  # function it is.
+  #
+  # If it is a standard C or C++ function, it will be of the form:
+  #  exampleFunction
+  #  Example::Function
+  #
+  # If it is an Objective-C funtion, it will be of the form:
+  #  -[NSExample function]
+  #  +[NSExample staticFunction]
+  #  -[NSExample function:withParameter]
+  #  +[NSExample staticFunction:withParameter:andAnother]
+
+  _FN1 = $2
+  _FN2 = $3
+
+  # If it is a standard C or C++ function then the following word will
+  # either be blank, or the text '(in', so we jut use the first one:
+
+  if (_FN2 == "(in" || _FN2 == "") {
+    _FN =_FN1
+  } else {
+    # Otherwise we concatenate the first two parts with .
+    _FN = _FN1 "." _FN2
+  }
+
+  # Modules are shown with '(in libfoo.dylib)' or '(in AppKit)'
+
+  _MODULE = ""
+  match($0, /\(in [^)]*\)/)
+
+  if (RSTART > 0 && MODULES) {
+
+    # Strip off the '(in ' (4 chars) and the final ')' char (1 char)
+    _MODULE = substr($0, RSTART+4, RLENGTH-5)
+
+    # Remove the .dylib function, since it adds no value.
+    gsub(/\.dylib/, "", _MODULE)
+
+    # The function name is 'module`functionName'
+    _FN = _MODULE "`" _FN
+  }
+
+  # Now we have set up the variables, we can decide how to apply it
+  # If we are descending in the nesting, we don't print anything out:
+  # a
+  # ab
+  # abc
+  #
+  # We only print out something when we go back a level, or hit the end:
+  # abcd
+  # abe < prints out the stack up until this point, i.e. abcd
+
+  # We store a pair of arrays, indexed by the nesting level:
+  #
+  #  _TIMES - a list of the time reported to that function
+  #  _NAMES - a list of the function names for each current stack trace
+
+  # If we are backtracking, we need to flush the current output.
+  if (_NEST <= _LEVEL) {
+    printStack(_LEVEL,_NEST)
+  }
+
+  # Record the name and time of the function where we are.
+  _NAMES[_NEST] = _FN
+  _TIMES[_NEST] = _TIME
+
+  # We subtract the time we took from our parent so we don't double count.
+  if (_NEST > 0) {
+    _TIMES[_NEST-1] -= _TIME
+  }
+
+  # Raise the high water mark of the level we have reached.
+  _LEVEL = _NEST
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-stap.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-stap.pl
@@ -0,0 +1,84 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-stap.pl	collapse multiline SystemTap stacks
+#				into single lines.
+#
+# Parses a multiline stack followed by a number on a separate line, and
+# outputs a semicolon separated stack followed by a space and the number.
+# If memory addresses (+0xd) are present, they are stripped, and resulting
+# identical stacks are colased with their counts summed.
+#
+# USAGE: ./stackcollapse.pl infile > outfile
+#
+# Example input:
+#
+#  0xffffffff8103ce3b : native_safe_halt+0xb/0x10 [kernel]
+#  0xffffffff8101c6a3 : default_idle+0x53/0x1d0 [kernel]
+#  0xffffffff81013236 : cpu_idle+0xd6/0x120 [kernel]
+#  0xffffffff815bf03e : rest_init+0x72/0x74 [kernel]
+#  0xffffffff81aebbfe : start_kernel+0x3ba/0x3c5 [kernel]
+#	2404
+#
+# Example output:
+#
+#  start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2404
+#
+# Input may contain many stacks as generated from SystemTap.
+#
+# Copyright 2011 Joyent, Inc.  All rights reserved.
+# Copyright 2011 Brendan Gregg.  All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# 16-Feb-2012	Brendan Gregg	Created this.
+
+use strict;
+
+my %collapsed;
+
+sub remember_stack {
+	my ($stack, $count) = @_;
+	$collapsed{$stack} += $count;
+}
+
+my @stack;
+
+foreach (<>) {
+	chomp;
+
+	if (m/^\s*(\d+)+$/) {
+		remember_stack(join(";", @stack), $1);
+		@stack = ();
+		next;
+	}
+
+	next if (m/^\s*$/);
+
+	my $frame = $_;
+	$frame =~ s/^\s*//;
+	$frame =~ s/\+[^+]*$//;
+	$frame =~ s/.* : //;
+	$frame = "-" if $frame eq "";
+	unshift @stack, $frame;
+}
+
+foreach my $k (sort { $a cmp $b } keys %collapsed) {
+	printf "$k $collapsed{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-vsprof.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-vsprof.pl
@@ -0,0 +1,98 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-vsprof.pl
+#
+# Parses the CSV file containing a call tree from a visual studio profiler and produces an output suitable for flamegraph.pl.
+#
+# USAGE: perl stackcollapse-vsprof.pl infile > outfile
+#
+# WORKFLOW:
+#
+# This example assumes you have visual studio 2015 installed.
+# 
+# 1. Profile C++ your application using visual studio
+# 2. On visual studio, choose export the call tree as csv
+# 3. Generate a flamegraph: perl stackcollapse-vsprof CallTreeSummary.csv | perl flamegraph.pl > result_vsprof.svg
+#
+# INPUT EXAMPLE :
+#
+# Level,Function Name,Inclusive Samples,Exclusive Samples,Inclusive Samples %,Exclusive Samples %,Module Name,
+# 1,"main","8,735",0,100.00,0.00,"an_executable.exe",
+# 2,"testing::UnitTest::Run","8,735",0,100.00,0.00,"an_executable.exe",
+# 3,"boost::trim_end_iter_select<std::iterator<std::val<std::types<char> > >,boost::is_classifiedF>",306,16,3.50,0.18,"an_executable.exe",
+#
+# OUTPUT EXAMPLE :
+#
+# main;testing::UnitTest::Run;boost::trim_end_iter_select<std::iterator<std::val<std::types<char>>>,boost::is_classifiedF> 306
+
+use strict;
+
+sub massage_function_names;
+sub parse_integer;
+sub print_stack_trace;
+
+# data initialization
+my @stack = ();
+my $line_number = 0;
+my $previous_samples = 0;
+
+my $num_args = $#ARGV + 1;
+if ($num_args != 1) {
+  print "$ARGV[0]\n";
+  print "Usage : stackcollapse-vsprof.pl <in.cvs> > out.txt\n";
+  exit;
+}
+
+my $input_csv_file = $ARGV[0];
+my $line_parser_rx = qr{
+  ^\s*(\d+?),            # level in the stack
+  ("[^"]+" | [^,]+),     # function name (beware of spaces)
+  ("[^"]+" | [^,]+),     # number of samples (beware of locale number formatting)
+}ox;
+
+open(my $fh, '<', $input_csv_file) or die "Can't read file '$input_csv_file' [$!]\n";
+
+while (my $current_line = <$fh>){
+  $line_number = $line_number + 1;
+
+  # to discard first line which typically contains headers
+  next if $line_number == 1;
+  next if $current_line =~ /^\s*$/o;
+ 
+  ($current_line =~ $line_parser_rx) or die "Error in regular expression at line $line_number : $current_line\n";
+
+  my $level = int $1;
+  my $function = massage_function_names($2);
+  my $samples = parse_integer($3);
+  my $stack_len = @stack;
+ 
+  #print "[DEBUG] $line_number : $level $function $samples $stack_len\n";
+
+  next if not $level;
+  ($level <= $stack_len + 1) or die "Error in stack at line $line_number : $current_line\n";
+
+  if ($level <= $stack_len) {
+		print_stack_trace(\@stack, $previous_samples);
+    my $to_remove = $level - $stack_len - 1;
+    splice(@stack, $to_remove);
+  }
+
+  $stack_len < 1000 or die "Stack overflow at line $line_number";
+  push(@stack, $function);
+  $previous_samples = $samples;
+}
+print_stack_trace(\@stack, $previous_samples);
+
+sub massage_function_names {
+  return ($_[0] =~ s/\s*|^"|"$//gro);
+}
+
+sub parse_integer {
+  return int ($_[0] =~ s/[., ]|^"|"$//gro);
+}
+
+sub print_stack_trace {
+  my ($stack_ref, $sample) = @_;
+	my $stack_trace = join(";", @$stack_ref);
+	print "$stack_trace $sample\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-vtune-mc.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-vtune-mc.pl
@@ -0,0 +1,103 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-vtune-mc.pl
+#
+# Parses the CSV file containing a call tree from Intel VTune memory-consumption profiler and produces an output suitable for flamegraph.pl.
+#
+# USAGE: perl stackcollapse-vtune-mc.pl [options] infile > outfile
+#
+# WORKFLOW:
+#
+# This assumes you have Intel VTune installed and on path (using Command Line)
+#
+# 1. Profile C++ application tachyon (example shipped with Intel VTune 2019):
+#
+#    amplxe-cl -collect memory-consumption -r mc_tachyon -- ./tachyon
+#
+# 2. Export raw VTune data to csv file:
+#    ### for Intel VTune 2019
+#    amplxe-cl -R top-down -call-stack-mode all \
+#			-column="Allocations:Self","Allocation Size:Self","Module" \
+#			-report-out allocations.csv -format csv \
+#			-csv-delimiter comma -r mc_tachyon
+#
+# 3. Generate a flamegraph:
+#    ## Generate for allocations amount.
+#    perl stackcollapse-vtune-mc.pl allocations.csv > out.folded
+#    perl flamegraph.pl --countname=allocations out.folded > vtune_tachyon_mc.svg
+#
+#    ## Or you can generate for allocation size in bytes.
+#    perl stackcollapse-vtune-mc.pl -s allocations.csv > out.folded
+#    perl flamegraph.pl --countname=allocations out.folded > vtune_tachyon_mc_size.svg
+#
+# AUTHOR: Rohith Bakkannagari
+# 27-Nov-2019	UnpluggedCoder		Forked from stackcollapse-vtune.pl, for memory-consumption flamegraph
+
+use strict;
+use Getopt::Long;
+
+sub usage {
+	die <<USAGE_END;
+Usage : $0 [options] allocations.csv > out.folded\n
+	--size		# Accumulate allocation size in bytes instead of allocation counts.\n
+NOTE : The csv file should exported by `amplxe-cl` tool with the exact -column parameter shows below.
+	amplxe-cl -R top-down -call-stack-mode all \
+		-column="Allocations:Self","Allocation Size:Self","Module" \
+		-report-out allocations.csv -format csv \
+		-csv-delimiter comma -r mc_tachyon
+USAGE_END
+}
+
+# data initialization
+my @stack = ();
+my $rowCounter = 0; # flag for row number
+
+my $accSize = '';
+GetOptions ('size' => \$accSize)
+or usage();
+
+my $numArgs = $#ARGV + 1;
+if ($numArgs != 1){
+	usage();
+	exit;
+}
+
+my $inputCSVFile = $ARGV[0];
+open(my $fh, '<', $inputCSVFile) or die "Can't read file '$inputCSVFile' [$!]\n";
+
+while (my $currLine = <$fh>){
+	# discard warning line
+	next if $rowCounter == 0 && rindex($currLine, "war:", 0) == 0;
+	$rowCounter = $rowCounter + 1;
+	# to discard first row which typically contains headers
+	next if $rowCounter == 1;
+	chomp $currLine;
+	#VTune - sometimes the call stack information is enclosed in double quotes (?).  To remove double quotes.
+	$currLine =~ s/\"//g;
+
+	### for Intel VTune 2019
+	### CSV header should be like below
+	### Function Stack,Allocation Size:Self,Deallocation Size:Self,Allocations:Self,Module
+	$currLine =~ /(\s*)(.*?),([0-9]*?\.?[0-9]*?),([0-9]*?\.?[0-9]*?),([0-9]*?\.?[0-9]*?),(.*)/ or die "Error in regular expression on the current line $currLine\n";
+	my $func = $2.'('.$6.')';	# function(module)
+	my $depth = length ($1);
+	my $allocBytes = $3; 	# allocation size
+	my $allocs = $5; 		# allocations
+
+	my $tempString = '';
+	$stack [$depth] = $func;
+	if ($accSize){
+		next if $allocBytes eq '';
+		foreach my $i (0 .. $depth - 1) {
+			$tempString = $tempString.$stack[$i].";";
+		}
+		$tempString = $tempString.$func." $allocBytes\n";
+	} else {
+		next if $allocs == 0;
+		foreach my $i (0 .. $depth - 1) {
+			$tempString = $tempString.$stack[$i].";";
+		}
+		$tempString = $tempString.$func." $allocs\n";
+	}
+	print "$tempString";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-vtune.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-vtune.pl
@@ -0,0 +1,97 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-vtune.pl
+#
+# Parses the CSV file containing a call tree from Intel VTune hotspots profiler and produces an output suitable for flamegraph.pl.
+#
+# USAGE: perl stackcollapse-vtune.pl infile > outfile
+#
+# WORKFLOW:
+#
+# This assumes you have Intel VTune installed and on path (using Command Line)
+#
+# 1. Profile C++ application tachyon_find_hotspots (example shipped with Intel VTune 2013):
+#
+#    amplxe-cl -collect hotspots -r result_vtune_tachyon -- ./tachyon_find_hotspots
+#
+# 2. Export raw VTune data to csv file:
+#
+##### VTune 2013 & 2015
+#   amplxe-cl -R top-down -report-out result_vtune_tachyon.csv -filter "Function Stack" -format csv -csv-delimiter comma -r result_vtune_tachyon
+#### VTune 2016
+#		amplxe-cl.exe -R top-down -call-stack-mode all -column="CPU Time:Self","Module" -report-output result_vtune_tachyon.csv -filter "Function Stack" -format csv -csv-delimiter comma -r result_vtune_tachyon
+#
+# 3. Generate a flamegraph:
+#
+#    perl stackcollapse-vtune result_vtune_tachyon.csv | perl flamegraph.pl > result_vtune_tachyon.svg
+#
+# AUTHOR: Rohith Bakkannagari
+
+use strict;
+
+# data initialization
+my @stack = ();
+my $rowCounter = 0; #flag for row number
+
+my $numArgs = $#ARGV + 1;
+if ($numArgs != 1)
+{
+print "$ARGV[0]\n";
+print "Usage : stackcollapse-vtune.pl <out.cvs> > out.txt\n";
+exit;
+}
+
+my $inputCSVFile = $ARGV[0];
+my $funcOnly = '';
+my $depth = 0;
+my $selfTime = 0;
+my $dllName = '';
+
+open(my $fh, '<', $inputCSVFile) or die "Can't read file '$inputCSVFile' [$!]\n";
+
+while (my $currLine = <$fh>){
+	$rowCounter = $rowCounter + 1;
+	# to discard first row which typically contains headers
+	next if $rowCounter == 1;
+	chomp $currLine;
+
+	### VTune 2013 & 2015
+	#VTune - sometimes the call stack information is enclosed in double quotes (?).  To remove double quotes.  Not necessary for XCode instruments (MAC)
+	$currLine =~ s/\"//g;
+	$currLine =~ /(\s*)(.*),(.*),.*,([0-9]*\.?[0-9]+)/ or die "Error in regular expression on the current line\n";
+	$dllName = $3;
+	$func = $dllName.'!'.$2; # Eg : m_lxe.dll!MathWorks::lxe::IrEngineDecorator::Apply
+	$depth = length ($1);
+	$selfTime = $4*1000; # selfTime in msec
+	### VTune 2013 & 2015
+
+	### VTune 2016
+	# $currLine =~ /(\s*)(.*?),([0-9]*\.?[0-9]+?),(.*)/ or die "Error in regular expression on the current line $currLine\n";
+	#  if ($2 =~ /\"/)
+	#  {
+	# 	$currLine =~ /(\s*)\"(.*?)\",([0-9]*\.?[0-9]+?),(.*)/ or die "Error in regular expression on the current line $currLine\n";
+	#  	$funcOnly = $2;
+	#  	$depth = length ($1);
+	#  	$selfTime = $3*1000; # selfTime in msec
+	#  	$dllName = $4;
+	#  }
+	#  else
+	#  {
+	#  	$funcOnly = $2;
+	#  	$depth = length ($1);
+	#  	$selfTime = $3*1000; # selfTime in msec
+	#  	$dllName = $4;
+	#  }
+	#  my $func = $dllName.'!'.$funcOnly; # Eg : m_lxe.dll!MathWorks::lxe::IrEngineDecorator::Apply
+	 ### VTune 2016
+
+	my $tempString = '';
+	$stack [$depth] = $func;
+	foreach my $i (0 .. $depth - 1) {
+		$tempString = $tempString.$stack[$i].";";
+	}
+	$tempString = $tempString.$func." $selfTime\n";
+	if ($selfTime != 0){
+		print "$tempString";
+	}
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-wcp.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-wcp.pl
@@ -0,0 +1,69 @@
+#!/usr/bin/perl -ws
+#
+# stackcollapse-wcp  Collapse wallClockProfiler backtraces
+#
+# Parse a list of GDB backtraces as generated by https://github.com/jasonrohrer/wallClockProfiler
+#
+# Copyright 2014 Gabriel Corona. All rights reserved.
+# Portions Copyright 2020 Ștefan Talpalaru <stefantalpalaru@yahoo.com>
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+use strict;
+
+my $current = "";
+my $start_processing = 0;
+my $samples = 0;
+my %stacks;
+
+while(<>) {
+  s/^\s+|\s+$//g;
+
+  if (m/^Full stacks/) {
+    $start_processing = 1;
+    next;
+  }
+
+  if (not $start_processing) {
+      next;
+  }
+
+  if(m/^\d+\.\d+% =+ \((\d+) samples\)/) {
+    # 99.791% ===================================== (17194 samples)
+    $samples = $1;
+    next;
+  } elsif (m/^\d+: (.*)$/) {
+    # 1: poll__YNjd8fE6xG8CRNwfLnrx0g_2   (at /mnt/sde1/storage/nim-beacon-chain-clean/vendor/nim-chronos/chronos/asyncloop.nim:343)
+    my $function = $1;
+    if ($current eq "") {
+      $current = $function;
+    } else {
+      $current = $function . ";" . $current;
+    }
+  } elsif (m/^$/ and $current ne "") {
+    $stacks{$current} += $samples;
+    $current = "";
+  }
+}
+
+foreach my $k (sort { $a cmp $b } keys %stacks) {
+  print "$k $stacks{$k}\n";
+}
+
--- a/tests/benchmarks/_script/flamegraph/stackcollapse-xdebug.php
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse-xdebug.php
@@ -0,0 +1,197 @@
+#!/usr/bin/php
+<?php
+#
+# Copyright 2018 Miriam Lauter (lauter.miriam@gmail.com).  All rights reserved.
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License
+#  as published by the Free Software Foundation; either version 2
+#  of the License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software Foundation,
+#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+#  (http://www.gnu.org/copyleft/gpl.html)
+#
+# 13-Apr-2018   Miriam Lauter   Created this.
+
+ini_set('error_log', null);
+$optind = null;
+$args = getopt("htc", ["help"], $optind);
+if (isset($args['h']) || isset($args['help'])) {
+    usage();
+}
+
+function usage($exit = 0) {
+    echo <<<EOT
+stackcollapse-xdebug.php  collapse php function traces into single lines.
+
+Parses php samples generated by xdebug with xdebug.trace_format = 1
+and outputs stacks as single lines, with methods separated by semicolons,
+and then a space and an occurrence count. For use with flamegraph.pl.
+See https://github.com/brendangregg/FlameGraph.
+
+USAGE: ./stackcollapse-xdebug.php [OPTIONS] infile > outfile
+    -h --help    Show this message
+    -t           Weight stack counts by duration using the time index in the trace (default)
+    -c           Invocation counts only. Simply count stacks in the trace and sum duplicates, don't weight by duration.
+
+Example input:
+For more info on xdebug and generating traces see
+https://xdebug.org/docs/execution_trace.
+
+Version: 2.0.0RC4-dev
+TRACE START [2007-05-06 18:29:01]
+1    0    0    0.010870    114112    {main}    1    ../trace.php    0
+2    1    0    0.032009    114272    str_split    0    ../trace.php    8
+2    1    1    0.032073    116632
+2    2    0    0.033505    117424    ret_ord    1    ../trace.php    10
+3    3    0    0.033531    117584    ord    0    ../trace.php    5
+3    3    1    0.033551    117584
+...
+TRACE END   [2007-05-06 18:29:01]
+
+Example output:
+
+- c
+{main};str_split 1
+{main};ret_ord;ord 6
+
+-t
+{main} 23381
+{main};str_split 64
+{main};ret_ord 215
+{main};ret_ord;ord 106
+
+EOT;
+
+    exit($exit);
+}
+
+function collapseStack(array $stack, string $func_name_key): string {
+    return implode(';', array_column($stack, $func_name_key));
+}
+
+function addCurrentStackToStacks(array $stack, float $dur, array &$stacks) {
+    $collapsed      = implode(';', $stack);
+    $duration       = SCALE_FACTOR * $dur;
+
+    if (array_key_exists($collapsed, $stacks)) {
+        $stacks[$collapsed] += $duration;
+    } else {
+        $stacks[$collapsed] = $duration;
+    }
+}
+
+function isEOTrace(string $l) {
+    $pattern = "/^(\\t|TRACE END)/";
+    return preg_match($pattern, $l);
+}
+
+$filename = $argv[$optind] ?? null;
+if ($filename === null) {
+    usage(1);
+}
+
+$do_time = !isset($args['c']);
+
+// First make sure our file is consistently formatted with only one \t delimiting each field
+$out = [];
+$retval = null;
+exec("sed -in 's/\t\+/\t/g' " . escapeshellarg($filename), $out, $retval);
+if ($retval !== 0) {
+    usage(1);
+}
+
+$handle = fopen($filename, 'r');
+
+if ($handle === false) {
+    echo "Unable to open $filename \n\n";
+    usage(1);
+}
+
+// Loop till we find TRACE START
+while ($l = fgets($handle)) {
+    if (strpos($l, "TRACE START") === 0) {
+        break;
+    }
+}
+
+const SCALE_FACTOR = 1000000;
+$stacks = [];
+$current_stack = [];
+$was_exit = false;
+$prev_start_time = 0;
+
+if ($do_time) {
+    // Weight counts by duration
+    // Xdebug trace time indices have 6 sigfigs of precision
+    // We have a perfect trace, but let's instead pretend that
+    // this was collected by sampling at 10^6 Hz
+    // then each millionth of a second this stack took to execute is 1 count
+    while ($l = fgets($handle)) {
+        if (isEOTrace($l)) {
+            break;
+        }
+
+        $parts = explode("\t", $l);
+        list($level, $fn_no, $is_exit, $time) = $parts;
+
+        if ($is_exit) {
+            if (empty($current_stack)) {
+                echo "[WARNING] Found function exit without corresponding entrance. Discarding line. Check your input.\n";
+                continue;
+            }
+
+            addCurrentStackToStacks($current_stack, $time - $prev_start_time, $stacks);
+            array_pop($current_stack);
+        } else {
+            $func_name = $parts[5];
+
+            if (!empty($current_stack)) {
+                addCurrentStackToStacks($current_stack, $time - $prev_start_time, $stacks);
+            }
+
+            $current_stack[] = $func_name;
+        }
+        $prev_start_time = $time;
+    }
+} else {
+    // Counts only
+    while ($l = fgets($handle)) {
+        if (isEOTrace($l)) {
+            break;
+        }
+
+        $parts = explode("\t", $l);
+        list($level, $fn_no, $is_exit) = $parts;
+
+        if ($is_exit === "1") {
+            if (!$was_exit) {
+                $collapsed = implode(";", $current_stack);
+                if (array_key_exists($collapsed, $stacks)) {
+                    $stacks[$collapsed]++;
+                } else {
+                    $stacks[$collapsed] = 1;
+                }
+            }
+
+            array_pop($current_stack);
+            $was_exit = true;
+        } else {
+            $func_name = $parts[5];
+            $current_stack[] = $func_name;
+            $was_exit = false;
+        }
+    }
+}
+
+foreach ($stacks as $stack => $count) {
+    echo "$stack $count\n";
+}
--- a/tests/benchmarks/_script/flamegraph/stackcollapse.pl
+++ b/tests/benchmarks/_script/flamegraph/stackcollapse.pl
@@ -0,0 +1,109 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse.pl	collapse multiline stacks into single lines.
+#
+# Parses a multiline stack followed by a number on a separate line, and
+# outputs a semicolon separated stack followed by a space and the number.
+# If memory addresses (+0xd) are present, they are stripped, and resulting
+# identical stacks are colased with their counts summed.
+#
+# USAGE: ./stackcollapse.pl infile > outfile
+#
+# Example input:
+#
+#  unix`i86_mwait+0xd
+#  unix`cpu_idle_mwait+0xf1
+#  unix`idle+0x114
+#  unix`thread_start+0x8
+#  1641
+#
+# Example output:
+#
+#  unix`thread_start;unix`idle;unix`cpu_idle_mwait;unix`i86_mwait 1641
+#
+# Input may contain many stacks, and can be generated using DTrace.  The
+# first few lines of input are skipped (see $headerlines).
+#
+# Copyright 2011 Joyent, Inc.  All rights reserved.
+# Copyright 2011 Brendan Gregg.  All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# 14-Aug-2011	Brendan Gregg	Created this.
+
+use strict;
+
+my $headerlines = 3;		# number of input lines to skip
+my $includeoffset = 0;		# include function offset (except leafs)
+my %collapsed;
+
+sub remember_stack {
+	my ($stack, $count) = @_;
+	$collapsed{$stack} += $count;
+}
+
+my $nr = 0;
+my @stack;
+
+foreach (<>) {
+	next if $nr++ < $headerlines;
+	chomp;
+
+	if (m/^\s*(\d+)+$/) {
+		my $count = $1;
+		my $joined = join(";", @stack);
+
+		# trim leaf offset if these were retained:
+		$joined =~ s/\+[^+]*$// if $includeoffset;
+
+		remember_stack($joined, $count);
+		@stack = ();
+		next;
+	}
+
+	next if (m/^\s*$/);
+
+	my $frame = $_;
+	$frame =~ s/^\s*//;
+	$frame =~ s/\+[^+]*$// unless $includeoffset;
+
+	# Remove arguments from C++ function names:
+	$frame =~ s/(::.*)[(<].*/$1/;
+
+	$frame = "-" if $frame eq "";
+
+        my @inline;
+        for (split /\->/, $frame) {
+            my $func = $_;
+
+            # Strip out L and ; included in java stacks
+            $func =~ tr/\;/:/;
+            $func =~ s/^L//;
+            $func .= "_[i]" if scalar(@inline) > 0; #inlined
+
+            push @inline, $func;
+        }
+
+	unshift @stack, @inline;
+}
+
+foreach my $k (sort { $a cmp $b } keys %collapsed) {
+	print "$k $collapsed{$k}\n";
+}
--- a/tests/benchmarks/_script/flamegraph/test.sh
+++ b/tests/benchmarks/_script/flamegraph/test.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+#
+# test.sh - Check flame graph software vs test result files.
+#
+# This is used to detect regressions in the flame graph software.
+# See record-test.sh, which refreshes these files after intended software
+# changes.
+#
+# Currently only tests stackcollapse-perf.pl.
+
+set -euo pipefail
+set -x
+set -v
+
+# ToDo: add some form of --inline, and --inline --context tests. These are
+# tricky since they use addr2line, whose output will vary based on the test
+# system's binaries and symbol tables.
+for opt in pid tid kernel jit all addrs; do
+  for testfile in test/*.txt ; do
+    echo testing $testfile : $opt
+    outfile=${testfile#*/}
+    outfile=test/results/${outfile%.txt}"-collapsed-${opt}.txt"
+    perl ./stackcollapse-perf.pl --"${opt}" "${testfile}" 2> /dev/null | diff -u - "${outfile}"
+    perl ./flamegraph.pl "${outfile}" > /dev/null
+  done
+done
--- a/tests/benchmarks/benchmark.sh
+++ b/tests/benchmarks/benchmark.sh
@@ -6,13 +6,14 @@ set -eo pipefail
 # parse the command line
 #

-usage() { echo "usage: $(basename "$0") [--cli <path>] [--name <cli-name>] [--baseline-cli <path>] [--suite <suite>] [--json <path>] [--zip <path>] [--verbose] [--debug]"; }
+usage() { echo "usage: $(basename "$0") [--cli <path>] [--name <cli-name>] [--baseline-cli <path>] [--suite <suite>] [--json <path>] [--flamegraph] [--zip <path>] [--verbose] [--debug]"; }

 TEST_CLI="git"
 TEST_CLI_NAME=
 BASELINE_CLI=
 SUITE=
 JSON_RESULT=
+FLAMEGRAPH=
 ZIP_RESULT=
 OUTPUT_DIR=
 VERBOSE=
@@ -66,6 +67,8 @@ for a in "$@"; do
 		NEXT="json"
 	elif [[ "${a}" == "-j"* ]]; then
 		JSON_RESULT="${a/-j/}"
+	elif [ "${a}" = "-F" ] || [ "${a}" == "--flamegraph" ]; then
+		FLAMEGRAPH=1
 	elif [ "${a}" = "-z" ] || [ "${a}" == "--zip" ]; then
 		NEXT="zip"
 	elif [[ "${a}" == "-z"* ]]; then
@@ -209,8 +212,9 @@ for TEST_PATH in "${BENCHMARK_DIR}"/*; do
 	fi

 	OUTPUT_FILE="${OUTPUT_DIR}/${TEST_FILE}.out"
-	JSON_FILE="${OUTPUT_DIR}/${TEST_FILE}.json"
 	ERROR_FILE="${OUTPUT_DIR}/${TEST_FILE}.err"
+	JSON_FILE="${OUTPUT_DIR}/${TEST_FILE}.json"
+	FLAMEGRAPH_FILE="${OUTPUT_DIR}/${TEST_FILE}.svg"

 	FAILED=
 	{
@@ -248,15 +252,54 @@ for TEST_PATH in "${BENCHMARK_DIR}"/*; do
 				two_mean=$(humanize_secs "${two_mean}")
 				two_stddev=$(humanize_secs "${two_stddev}")

-				echo "${one_mean} ± ${one_stddev}  vs  ${two_mean} ± ${two_stddev}"
+				echo -n "${one_mean} ± ${one_stddev}  vs  ${two_mean} ± ${two_stddev}"
 			else
-				echo "${one_mean} ± ${one_stddev}"
+				echo -n "${one_mean} ± ${one_stddev}"
 			fi
 		done
 	fi

 	# add our metadata to the hyperfine json result
 	jq ". |= { \"name\": \"${TEST_NAME}\" } + ." < "${JSON_FILE}" > "${JSON_FILE}.new" && mv "${JSON_FILE}.new" "${JSON_FILE}"
+
+	# run with flamegraph output if requested
+	if [ "${FLAMEGRAPH}" ]; then
+		PROFILER_OUTPUT_FILE="${OUTPUT_DIR}/${TEST_FILE}-profiler.out"
+		PROFILER_ERROR_FILE="${OUTPUT_DIR}/${TEST_FILE}-profiler.err"
+
+		if [ "${VERBOSE}" = "1" ]; then
+			echo "  Profiling and creating flamegraph ..."
+		else
+			echo -n "  --  profiling..."
+		fi
+
+		RESULT=
+		{ ${TEST_PATH} --cli "${TEST_CLI}" --profile --flamegraph "${FLAMEGRAPH_FILE}" >>"${PROFILER_OUTPUT_FILE}" 2>>"${PROFILER_ERROR_FILE}" || RESULT=$?; }
+
+		if [ "${VERBOSE}" = "1" ]; then
+			indent < "${PROFILER_OUTPUT_FILE}"
+			indent < "${PROFILER_ERROR_FILE}"
+		else
+			# error code 2 indicates a non-fatal error creating
+			# the flamegraph
+			if [ "${RESULT}" = "" -o "${RESULT}" = "0" ]; then
+				echo " done."
+			elif [ "${RESULT}" = "2" ]; then
+				echo " missing resources."
+			elif [ "${RESULT}" = "3" ]; then
+				echo " sample too small."
+
+				indent < "${PROFILER_ERROR_FILE}"
+			elif [ "${RESULT}" = "4" ]; then
+				echo " unavailable."
+			else
+				echo " failed."
+
+				indent < "${PROFILER_ERROR_FILE}"
+				ANY_FAILED=1
+			fi
+		fi
+	fi
 done

 TIME_END=$(time_in_ms)
@@ -308,6 +351,7 @@ if [ "$CLEANUP_DIR" = "1" ]; then
 	rm -f "${OUTPUT_DIR}"/*.out
 	rm -f "${OUTPUT_DIR}"/*.err
 	rm -f "${OUTPUT_DIR}"/*.json
+	rm -f "${OUTPUT_DIR}"/*.svg
 	rmdir "${OUTPUT_DIR}"
 fi

--- a/tests/benchmarks/benchmark_helpers.sh
+++ b/tests/benchmarks/benchmark_helpers.sh
@@ -7,15 +7,17 @@ set -eo pipefail
 # command-line parsing
 #

-usage() { echo "usage: $(basename "$0") [--cli <path>] [--baseline-cli <path>] [--output-style <style>] [--json <path>]"; }
+usage() { echo "usage: $(basename "$0") [--cli <path>] [--baseline-cli <path>] [--output-style <style>] [--json <path>] [--profile] [--flamegraph <path>]"; }

 NEXT=
 BASELINE_CLI=
 TEST_CLI="git"
-JSON=
 SHOW_OUTPUT=
+JSON=
+PROFILE=
+FLAMEGRAPH=

-if [ "$CI" != "" ]; then
+if [ "$CI" != "" -a -t 1 ]; then
 	OUTPUT_STYLE="color"
 else
 	OUTPUT_STYLE="auto"
@@ -23,6 +25,9 @@ fi

 HELP_GIT_REMOTE="https://github.com/git/git"
 HELP_LINUX_REMOTE="https://github.com/torvalds/linux"
+HELP_RESOURCE_REPO="https://github.com/libgit2/benchmark-resources"
+
+BENCHMARK_DIR=${BENCHMARK_DIR:=$(dirname "$0")}

 #
 # parse the arguments to the outer script that's including us; these are arguments that
@@ -42,6 +47,9 @@ for a in "$@"; do
 	elif [ "${NEXT}" = "json" ]; then
 		JSON="${a}"
 		NEXT=
+	elif [ "${NEXT}" = "flamegraph" ]; then
+		FLAMEGRAPH="${a}"
+		NEXT=
 	elif [ "${a}" = "-c" ] || [ "${a}" = "--cli" ]; then
 		NEXT="cli"
 	elif [[ "${a}" == "-c"* ]]; then
@@ -52,13 +60,19 @@ for a in "$@"; do
 		BASELINE_CLI="${a/-b/}"
 	elif [ "${a}" == "--output-style" ]; then
 		NEXT="output-style"
-	elif [ "${a}" = "-j" ] || [ "${a}" = "--json" ]; then
-		NEXT="json"
-	elif [[ "${a}" == "-j"* ]]; then
-		JSON="${a}"
 	elif [ "${a}" = "--show-output" ]; then
 		SHOW_OUTPUT=1
 		OUTPUT_STYLE=
+	elif [ "${a}" = "-j" ] || [ "${a}" = "--json" ]; then
+		NEXT="json"
+	elif [[ "${a}" == "-j"* ]]; then
+                JSON="${a/-j/}"
+	elif [ "${a}" = "-p" ] || [ "${a}" = "--profile" ]; then
+		PROFILE=1
+	elif [ "${a}" = "-F" ] || [ "${a}" = "--flamegraph" ]; then
+		NEXT="flamegraph"
+	elif [[ "${a}" == "-F"* ]]; then
+                FLAMEGRAPH="${a/-F/}"
 	else
                echo "$(basename "$0"): unknown option: ${a}" 1>&2
 		usage 1>&2
@@ -99,7 +113,7 @@ temp_dir() {
 	fi
 }

-create_preparescript() {
+create_prepare_script() {
 	# add some functions for users to use in preparation
 	cat >> "${SANDBOX_DIR}/prepare.sh" << EOF
 	set -e
@@ -205,6 +219,30 @@ create_preparescript() {
 		cp -R "\${RESOURCES_DIR}/\${RESOURCE}" "\${SANDBOX_DIR}/"
 	}

+	sandbox_resource() {
+		RESOURCE="\${1}"
+
+		if [ "\${RESOURCE}" = "" ]; then
+			echo "usage: sandbox_resource <path>" 1>&2
+			exit 1
+		fi
+
+		RESOURCE_UPPER=\$(echo "\${RESOURCE}" | tr '[:lower:]' '[:upper:]' | sed -e "s/-/_/g")
+		RESOURCE_PATH=\$(eval echo "\\\${BENCHMARK_\${RESOURCE_UPPER}_PATH}")
+
+		if [ "\${RESOURCE_PATH}" = "" -a "\${BENCHMARK_RESOURCES_PATH}" != "" ]; then
+			RESOURCE_PATH="\${BENCHMARK_RESOURCES_PATH}/\${RESOURCE}"
+		fi
+
+		if [ ! -f "\${RESOURCE_PATH}" ]; then
+			echo "sandbox: the resource \"\${RESOURCE}\" does not exist"
+			exit 1
+		fi
+
+		rm -rf "\${SANDBOX_DIR:?}/\${RESOURCE}"
+		cp -R "\${RESOURCE_PATH}" "\${SANDBOX_DIR}/\${RESOURCE}"
+	}
+
 	sandbox_repo() {
 		RESOURCE="\${1}"

@@ -229,8 +267,8 @@ create_preparescript() {
 			exit 1
 		fi

-		REPO_UPPER=\$(echo "\${1}" | tr '[:lower:]' '[:upper:]')
-		REPO_URL=\$(eval echo "\\\${BENCHMARK_\${REPO_UPPER}_REPOSITORY}")
+		REPO_UPPER=\$(echo "\${REPO}" | tr '[:lower:]' '[:upper:]')
+		REPO_URL=\$(eval echo "\\\${BENCHMARK_\${REPO_UPPER}_PATH}")

 		if [ "\${REPO_URL}" = "" ]; then
 			echo "\$0: unknown repository '\${REPO}'" 1>&2
@@ -252,10 +290,7 @@ EOF
 	echo "${SANDBOX_DIR}/prepare.sh"
 }

-create_runscript() {
-	SCRIPT_NAME="${1}"; shift
-	CLI_PATH="${1}"; shift
-
+start_dir() {
 	if [[ "${CHDIR}" = "/"* ]]; then
 		START_DIR="${CHDIR}"
 	elif [ "${CHDIR}" != "" ]; then
@@ -264,6 +299,15 @@ create_runscript() {
 		START_DIR="${SANDBOX_DIR}"
 	fi

+	echo "${START_DIR}"
+}
+
+create_run_script() {
+	SCRIPT_NAME="${1}"; shift
+	CLI_PATH="${1}"; shift
+
+	START_DIR=$(start_dir)
+
 	# our run script starts by chdir'ing to the sandbox or repository directory
 	echo -n "cd \"${START_DIR}\" && \"${CLI_PATH}\"" >> "${SANDBOX_DIR}/${SCRIPT_NAME}.sh"

@@ -271,16 +315,12 @@ create_runscript() {
 		echo -n " \"${a}\"" >> "${SANDBOX_DIR}/${SCRIPT_NAME}.sh"
 	done

+	echo "" >> "${SANDBOX_DIR}/${SCRIPT_NAME}.sh"
+
 	echo "${SANDBOX_DIR}/${SCRIPT_NAME}.sh"
 }

-gitbench_usage() { echo "usage: gitbench command..."; }
-
-#
-# this is the function that the outer script calls to actually do the sandboxing and
-# invocation of hyperfine.
-#
-gitbench() {
+parse_arguments() {
 	NEXT=

 	# this test should run the given command in preparation of the tests
@@ -336,34 +376,92 @@ gitbench() {
 		exit 1
 	fi

-	# sanity check
+	echo "PREPARE=\"${PREPARE}\""
+	echo "CHDIR=\"${CHDIR}\""
+	echo "WARMUP=\"${WARMUP}\""

-	for a in "${SANDBOX[@]}"; do
-		if [ ! -d "$(resources_dir)/${a}" ]; then
-			echo "$0: no resource '${a}' found" 1>&2
-			exit 1
-		fi
+	echo -n "GIT_ARGUMENTS=("
+
+	for arg in $@; do
+		echo -n " \"${arg}\""
 	done
+	echo " )"
+}

-	if [ "$REPOSITORY" != "" ]; then
-		if [ ! -d "$(resources_dir)/${REPOSITORY}" ]; then
-			echo "$0: no repository resource '${REPOSITORY}' found" 1>&2
-			exit 1
-		fi
+gitbench_usage() { echo "usage: gitbench command..."; }
+
+exec_profiler() {
+	if [ "${BASELINE_CLI}" != "" ]; then
+		echo "$0: baseline is not supported in profiling mode" 1>&2
+		exit 1
 	fi

-	# set up our sandboxing
+	if [ "${SHOW_OUTPUT}" != "" ]; then
+		echo "$0: show-output is not supported in profiling mode" 1>&2
+		exit 1
+	fi

-	SANDBOX_DIR="$(temp_dir)"
+	if [ "$JSON" != "" ]; then
+		echo "$0: json is not supported in profiling mode" 1>&2
+		exit 1
+	fi
+
+	SYSTEM=$(uname -s)
+
+	TEST_CLI_PATH=$(fullpath "${TEST_CLI}")
+	START_DIR=$(start_dir)
+
+	if [ "${SYSTEM}" = "Linux" ]; then
+		if [ "${OUTPUT_STYLE}" = "color" ]; then
+			COLOR_ARG="always"
+		elif [ "${OUTPUT_STYLE}" = "none" ]; then
+			COLOR_ARG="never"
+		elif [ "${OUTPUT_STYLE}" = "auto" ]; then
+			COLOR_ARG="auto"
+		else
+			echo "$0: unknown output-style option" 1>&2
+			exit 1
+		fi
+
+		bash "${PREPARE_SCRIPT}"
+		( cd "${START_DIR}" && perf record -F 999 -a -g -o "${SANDBOX_DIR}/perf.data" -- "${TEST_CLI_PATH}" "${GIT_ARGUMENTS[@]}" )
+
+		# we may not have samples if the process exited quickly
+		SAMPLES=$(perf report -D -i "${SANDBOX_DIR}/perf.data" | { grep "RECORD_SAMPLE" || test $? = 1; } | wc -l)
+
+		if [ "${SAMPLES}" = "0" ]; then
+			echo "$0: no profiling samples created" 1>&2
+			exit 3
+		fi
+
+		if [ "${FLAMEGRAPH}" = "" ]; then
+			perf report --stdio --stdio-color "${COLOR_ARG}" -i "${SANDBOX_DIR}/perf.data"
+		else
+			perf script -i "${SANDBOX_DIR}/perf.data" | "${BENCHMARK_DIR}/_script/flamegraph/stackcollapse-perf.pl" > "${SANDBOX_DIR}/perf.data.folded"
+			perl "${BENCHMARK_DIR}/_script/flamegraph/flamegraph.pl" "${SANDBOX_DIR}/perf.data.folded" > "${FLAMEGRAPH}"
+		fi
+	else
+		# macos - requires system integrity protection is disabled :(
+		# dtrace -s "bash ${TEST_RUN_SCRIPT}" -o filename -n "profile-997 /execname == \"${TEST_CLI}\"/ { @[ustack(100)] = count(); }"
+		echo "$0: profiling is not supported on ${SYSTEM}" 1>&2
+		exit 4
+	fi
+}
+
+exec_hyperfine() {
+	if [ "$FLAMEGRAPH" != "" ]; then
+		echo "$0: flamegraph is not supported in standard mode" 1>&2
+		exit 1
+	fi

 	if [ "${BASELINE_CLI}" != "" ]; then
 		BASELINE_CLI_PATH=$(fullpath "${BASELINE_CLI}")
-		BASELINE_RUN_SCRIPT=$(create_runscript "baseline" "${BASELINE_CLI_PATH}" "$@")
+		BASELINE_RUN_SCRIPT=$(create_run_script "baseline" "${BASELINE_CLI_PATH}" "${GIT_ARGUMENTS[@]}")
 	fi
-	TEST_CLI_PATH=$(fullpath "${TEST_CLI}")
-	TEST_RUN_SCRIPT=$(create_runscript "test" "${TEST_CLI_PATH}" "$@")

-	PREPARE_SCRIPT="$(create_preparescript)"
+	TEST_CLI_PATH=$(fullpath "${TEST_CLI}")
+	TEST_RUN_SCRIPT=$(create_run_script "test" "${TEST_CLI_PATH}" "${GIT_ARGUMENTS[@]}")
+
 	ARGUMENTS=("--prepare" "bash ${PREPARE_SCRIPT}" "--warmup" "${WARMUP}")

 	if [ "${OUTPUT_STYLE}" != "" ]; then
@@ -379,13 +477,42 @@ gitbench() {
 	fi

 	if [ "${BASELINE_CLI}" != "" ]; then
-		ARGUMENTS+=("-n" "${BASELINE_CLI} $*" "bash ${BASELINE_RUN_SCRIPT}")
+		ARGUMENTS+=("-n" "${BASELINE_CLI} ${GIT_ARGUMENTS[*]}" "bash ${BASELINE_RUN_SCRIPT}")
 	fi

-	ARGUMENTS+=("-n" "${TEST_CLI} $*" "bash ${TEST_RUN_SCRIPT}")
+	ARGUMENTS+=("-n" "${TEST_CLI} ${GIT_ARGUMENTS[*]}" "bash ${TEST_RUN_SCRIPT}")

 	hyperfine "${ARGUMENTS[@]}"
-	rm -rf "${SANDBOX_DIR:?}"
+}
+
+#
+# this is the function that the outer script calls to actually do the sandboxing and
+# invocation of hyperfine.
+#
+gitbench() {
+	eval $(parse_arguments "$@")
+
+	# sanity check
+
+	for a in "${SANDBOX[@]}"; do
+		if [ ! -d "$(resources_dir)/${a}" ]; then
+			echo "$0: no resource '${a}' found" 1>&2
+			exit 1
+		fi
+	done
+
+	# set up our sandboxing
+
+	SANDBOX_DIR="$(temp_dir)"
+	PREPARE_SCRIPT="$(create_prepare_script)"
+
+	if [ "${PROFILE}" != "" ]; then
+		exec_profiler
+	else
+		exec_hyperfine
+	fi
+
+#	rm -rf "${SANDBOX_DIR:?}"
 }

 # helper script to give useful error messages about configuration
@@ -397,17 +524,45 @@ needs_repo() {
 		exit 1
 	fi

-	REPO_UPPER=$(echo "${1}" | tr '[:lower:]' '[:upper:]')
-	REPO_URL=$(eval echo "\${BENCHMARK_${REPO_UPPER}_REPOSITORY}")
+	REPO_UPPER=$(echo "${REPO}" | tr '[:lower:]' '[:upper:]')
+	REPO_PATH=$(eval echo "\${BENCHMARK_${REPO_UPPER}_PATH}")
 	REPO_REMOTE_URL=$(eval echo "\${HELP_${REPO_UPPER}_REMOTE}")

-	if [ "${REPO_URL}" = "" ]; then
+	if [ "${REPO_PATH}" = "" ]; then
 		echo "$0: '${REPO}' repository not configured" 1>&2
 		echo "" 1>&2
 		echo "This benchmark needs an on-disk '${REPO}' repository. First, clone the" 1>&2
-		echo "remote repository ('${REPO_REMOTE_URL}') locally then set," 1>&2
-		echo "the 'BENCHMARK_${REPO_UPPER}_REPOSITORY' environment variable to the path that" 1>&2
+		echo "remote repository ('${REPO_REMOTE_URL}') locally then set" 1>&2
+		echo "the 'BENCHMARK_${REPO_UPPER}_PATH' environment variable to the path that" 1>&2
 		echo "contains the repository locally, then run this benchmark again." 1>&2
 		exit 2
 	fi
 }
+
+# helper script to give useful error messages about configuration
+needs_resource() {
+	RESOURCE="${1}"
+
+	if [ "${RESOURCE}" = "" ]; then
+		echo "usage: needs_resource <resource>" 1>&2
+		exit 1
+	fi
+
+	RESOURCE_UPPER=$(echo "${RESOURCE}" | tr '[:lower:]' '[:upper:]' | sed -e "s/-/_/g")
+	RESOURCE_PATH=$(eval echo "\${BENCHMARK_${RESOURCE_UPPER}_PATH}")
+
+	if [ "${RESOURCE_PATH}" = "" -a "${BENCHMARK_RESOURCES_PATH}" != "" ]; then
+		RESOURCE_PATH="${BENCHMARK_RESOURCES_PATH}/${RESOURCE}"
+	fi
+
+	if [ "${RESOURCE_PATH}" = "" ]; then
+		echo "$0: '${RESOURCE}' resource path not configured" 1>&2
+		echo "" 1>&2
+		echo "This benchmark needs an on-disk resource named '${RESOURCE}'." 1>&2
+		echo "First, clone the additional benchmark resources locally (from" 1>&2
+		echo "'${HELP_RESOURCE_REPO}'), then set the" 1>& 2
+		echo "'BENCHMARK_RESOURCES_PATH' environment variable to the path that" 1>&2
+		echo "contains the resources locally, then run this benchmark again." 1>&2
+		exit 2
+	fi
+}
--- a/tests/benchmarks/indexpack__250mb
+++ b/tests/benchmarks/indexpack__250mb
@@ -0,0 +1,11 @@
+#!/bin/bash -e
+
+. "$(dirname "$0")/benchmark_helpers.sh"
+
+needs_resource packfile-250mb
+
+gitbench --prepare "git init --bare dest.git && sandbox_resource packfile-250mb && mv packfile-250mb dest.git/packfile-250mb.pack" \
+         --warmup 5 \
+         --chdir "dest.git" \
+	 -- \
+	 index-pack packfile-250mb.pack