pkg/zbench

Files

Aleksey Shakhmatov 3f52ff1eeb docs: add README with usage examples

Covers setup (build.zig.zon + zbench_build helper), the minimal
benchmark shape, the Benchmark API surface, timer control,
sub-benchmarks, comptime-parametric generation, CLI flags, JSON
output, and the optimizer pitfall around trivial loops.

2026-05-21 08:22:50 +03:00

8.7 KiB

Raw Blame History

zbench

Go-style benchmarking for Zig 0.16+. No external dependencies. Adaptive iteration count, per-op allocation tracking, throughput (MB/s), sub-benchmarks, statistical repetition, and text or JSON output.

benchmark                           iters        ns/op       B/op  allocs/op         MB/s
append_u8                       300000000         0.52          1          0
sha256_64                         3000000        53.62                            1138.37
hash_sizes/sha256_16              5000000        32.55                             468.81
hash_sizes/sha256_256             1000000       134.26                            1818.43
hash_sizes/sha256_4096             100000      1778.86                            2195.92

Requirements

Zig 0.16.0 or newer.

Adding the dependency

In your project's build.zig.zon:

.dependencies = .{
    .zbench = .{
        .url = "https://example.com/zbench-0.1.0.tar.gz",
        .hash = "...",
    },
},

Or fetch from a local path during development:

zig fetch --save=zbench ../path/to/zbench

Wiring it up in `build.zig`

The library exposes two modules: zbench (the runtime API) and zbench_build (a small build helper). A typical consumer build.zig:

const std = @import("std");
const zbench_build = @import("zbench_build");

pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});

    const zbench = b.dependency("zbench", .{
        .target = target,
        .optimize = .ReleaseFast,
    });

    _ = zbench_build.add_bench_step(b, .{
        .step_name = "bench",
        .root = b.path("bench/main.zig"),
        .target = target,
        .zbench = zbench.module("zbench"),
    });
}

Now zig build bench compiles and runs the benchmark executable, and zig build bench -- --min-time=200ms --count=3 forwards CLI arguments through to it.

If you prefer not to use the helper, you can build the executable yourself and wire it up like any other b.addExecutable step — see examples/bench/build.zig for the explicit version.

Minimal benchmark

const std = @import("std");
const zbench = @import("zbench");

pub fn main(init: std.process.Init) !void {
    var suite = zbench.Suite.init(init.gpa, init.io);
    defer suite.deinit();

    try suite.add("append", bench_append);
    try suite.add("hash", bench_hash);

    try suite.run_cli(init);
}

fn bench_append(b: *zbench.Benchmark) !void {
    var list: std.ArrayListUnmanaged(u8) = .empty;
    defer list.deinit(b.allocator);

    b.reset_timer();
    var i: u64 = 0;
    while (i < b.n) : (i += 1) {
        try list.append(b.allocator, @intCast(i & 0xff));
    }
    b.keep(list.items);
}

fn bench_hash(b: *zbench.Benchmark) !void {
    var buf: [64]u8 = @splat(0xab);
    var out: [32]u8 = undefined;

    b.reset_timer();
    var i: u64 = 0;
    while (i < b.n) : (i += 1) {
        std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{});
        b.keep(out);
    }
    b.set_bytes(buf.len);
}

The mental model matches Go's testing.B: the runner calls your function with an increasing b.n until the wall time crosses --min-time. You write the loop, the framework picks how many times to run it.

The `Benchmark` API

b.n — target iteration count for the current attempt.
b.allocator — a wrapping allocator that counts allocations; use it if you want B/op and allocs/op reported.
b.io — std.Io for benchmarks that need to perform I/O.
b.reset_timer() — call after setup, before the measured loop.
b.stop_timer() / b.start_timer() — exclude per-iteration setup from the measurement.
b.set_bytes(bytes_per_op) — declare throughput; the reporter shows MB/s.
b.report_allocs() — force the B/op / allocs/op columns for this benchmark, regardless of --allocs.
b.keep(value) — optimization barrier; keeps a computed value alive past the loop so ReleaseFast does not delete the work.
b.run(name, fn) — run a sub-benchmark. The parent function becomes a container and is not itself reported; the sub-benchmark is reported as parent/name.

Excluding setup from the measurement

fn bench_lookup(b: *zbench.Benchmark) !void {
    var map: std.AutoHashMapUnmanaged(u64, u64) = .empty;
    defer map.deinit(b.allocator);
    for (0..1000) |k| try map.put(b.allocator, k, k *% 31);

    b.reset_timer();
    var hits: u64 = 0;
    var i: u64 = 0;
    while (i < b.n) : (i += 1) {
        if (map.get(i % 1000)) |v| hits +%= v;
    }
    b.keep(hits);
}

Per-iteration setup that should not be measured:

while (i < b.n) : (i += 1) {
    b.stop_timer();
    const input = try generate_input(b.allocator);
    defer b.allocator.free(input);
    b.start_timer();

    _ = process(input);
}

Sub-benchmarks

A parent function delegates to one or more sub-benchmarks via b.run. Each sub is run as a fresh adaptive attempt and reported on its own line:

fn bench_hash_sizes(b: *zbench.Benchmark) !void {
    try b.run("sha256_16",   gen_sha256(16));
    try b.run("sha256_256",  gen_sha256(256));
    try b.run("sha256_4096", gen_sha256(4096));
}

fn gen_sha256(comptime size: usize) zbench.BenchFn {
    return struct {
        fn run(b: *zbench.Benchmark) !void {
            var buf: [size]u8 = @splat(0xcd);
            var out: [32]u8 = undefined;
            b.reset_timer();
            var i: u64 = 0;
            while (i < b.n) : (i += 1) {
                std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{});
                b.keep(out);
            }
            b.set_bytes(size);
        }
    }.run;
}

Output:

hash_sizes/sha256_16              5000000        32.55                             468.81
hash_sizes/sha256_256             1000000       134.26                            1818.43
hash_sizes/sha256_4096             100000      1778.86                            2195.92

Comptime-parametric benchmarks

Zig has no closures, so parameterize at compile time and register one benchmark per value:

inline for (.{ 16, 256, 4096 }) |size| {
    try suite.add(
        std.fmt.comptimePrint("memset_{d}", .{size}),
        gen_bench_memset(size),
    );
}

fn gen_bench_memset(comptime size: usize) zbench.BenchFn {
    return struct {
        fn run(b: *zbench.Benchmark) !void {
            var buf: [size]u8 = undefined;
            b.reset_timer();
            var i: u64 = 0;
            while (i < b.n) : (i += 1) {
                @memset(&buf, @intCast(i & 0xff));
                b.keep(buf);
            }
            b.set_bytes(size);
        }
    }.run;
}

CLI flags

Flag	Default	Meaning
`--filter=<substring>`	none	Run only benchmarks whose name contains the substring. Use `parent/leaf` to target a specific sub-bench.
`--min-time=<dur>`	`1s`	Minimum wall time per benchmark. Accepts `s`, `ms`, `us`, `ns`.
`--count=<n>`	`1`	Repeat each benchmark `n` times; output includes `mean ± stddev` (text) or `samples` (JSON).
`--max-iters=<n>`	`1_000_000_000`	Hard cap on iterations per attempt.
`--allocs`	off	Always print `B/op` and `allocs/op` columns.
`--format=text\|json`	`text`	Output format. JSON is ndjson — one object per line.
`--list`	—	Print names of all registered benchmarks and exit.
`--help`, `-h`	—	Print help.

JSON output

--format=json emits one ndjson record per benchmark group:

{"name":"sha256_64","n":1000000,"ns_per_op":53.63,"bytes_per_op":0,"allocs_per_op":0,"mb_per_sec":1133.49,"count":3,"ns_per_op_mean":53.63,"ns_per_op_stddev":0.26,"ns_per_op_min":53.35,"samples":[53.71,53.35,53.85]}

Suitable for piping into a comparison tool, persisting in CI, or plotting.

A note on the optimizer

zig build defaults the benchmark executable to ReleaseFast. In that mode the compiler will delete trivial work whose result is never used. If you see 0.00 ns/op for a hot loop, that's the signal — wrap the result inside the loop with b.keep(value):

var sum: u64 = 0;
var i: u64 = 0;
while (i < b.n) : (i += 1) {
    sum +%= i *% 31;
    b.keep(sum); // forces the optimizer to keep the work
}

Idioms

Pass init.gpa and init.io from main straight into the suite — zbench does not reach for global state.
The allocator exposed by b.allocator wraps your gpa for accounting; use it as you would any allocator.
Benchmark methods use snake_case to match the project's house style (reset_timer, set_bytes, …). Calls into std keep their upstream casing.

Running the bundled example

From the repo root:

zig build example -- --min-time=100ms
zig build example -- --filter=hash_sizes --count=3 --format=json

License

MIT — see LICENSE if present, or treat this as your project's default license terms until one is added.

8.7 KiB Raw Blame History