Covers setup (build.zig.zon + zbench_build helper), the minimal benchmark shape, the Benchmark API surface, timer control, sub-benchmarks, comptime-parametric generation, CLI flags, JSON output, and the optimizer pitfall around trivial loops.
zbench
Go-style benchmarking for Zig 0.16+. No external dependencies. Adaptive iteration count, per-op allocation tracking, throughput (MB/s), sub-benchmarks, statistical repetition, and text or JSON output.
benchmark iters ns/op B/op allocs/op MB/s
append_u8 300000000 0.52 1 0
sha256_64 3000000 53.62 1138.37
hash_sizes/sha256_16 5000000 32.55 468.81
hash_sizes/sha256_256 1000000 134.26 1818.43
hash_sizes/sha256_4096 100000 1778.86 2195.92
Requirements
- Zig 0.16.0 or newer.
Adding the dependency
In your project's build.zig.zon:
.dependencies = .{
.zbench = .{
.url = "https://example.com/zbench-0.1.0.tar.gz",
.hash = "...",
},
},
Or fetch from a local path during development:
zig fetch --save=zbench ../path/to/zbench
Wiring it up in build.zig
The library exposes two modules: zbench (the runtime API) and
zbench_build (a small build helper). A typical consumer build.zig:
const std = @import("std");
const zbench_build = @import("zbench_build");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const zbench = b.dependency("zbench", .{
.target = target,
.optimize = .ReleaseFast,
});
_ = zbench_build.add_bench_step(b, .{
.step_name = "bench",
.root = b.path("bench/main.zig"),
.target = target,
.zbench = zbench.module("zbench"),
});
}
Now zig build bench compiles and runs the benchmark executable, and
zig build bench -- --min-time=200ms --count=3 forwards CLI arguments
through to it.
If you prefer not to use the helper, you can build the executable
yourself and wire it up like any other b.addExecutable step — see
examples/bench/build.zig for the explicit version.
Minimal benchmark
const std = @import("std");
const zbench = @import("zbench");
pub fn main(init: std.process.Init) !void {
var suite = zbench.Suite.init(init.gpa, init.io);
defer suite.deinit();
try suite.add("append", bench_append);
try suite.add("hash", bench_hash);
try suite.run_cli(init);
}
fn bench_append(b: *zbench.Benchmark) !void {
var list: std.ArrayListUnmanaged(u8) = .empty;
defer list.deinit(b.allocator);
b.reset_timer();
var i: u64 = 0;
while (i < b.n) : (i += 1) {
try list.append(b.allocator, @intCast(i & 0xff));
}
b.keep(list.items);
}
fn bench_hash(b: *zbench.Benchmark) !void {
var buf: [64]u8 = @splat(0xab);
var out: [32]u8 = undefined;
b.reset_timer();
var i: u64 = 0;
while (i < b.n) : (i += 1) {
std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{});
b.keep(out);
}
b.set_bytes(buf.len);
}
The mental model matches Go's testing.B: the runner calls your
function with an increasing b.n until the wall time crosses
--min-time. You write the loop, the framework picks how many times
to run it.
The Benchmark API
b.n— target iteration count for the current attempt.b.allocator— a wrapping allocator that counts allocations; use it if you wantB/opandallocs/opreported.b.io—std.Iofor benchmarks that need to perform I/O.b.reset_timer()— call after setup, before the measured loop.b.stop_timer()/b.start_timer()— exclude per-iteration setup from the measurement.b.set_bytes(bytes_per_op)— declare throughput; the reporter showsMB/s.b.report_allocs()— force theB/op/allocs/opcolumns for this benchmark, regardless of--allocs.b.keep(value)— optimization barrier; keeps a computed value alive past the loop so ReleaseFast does not delete the work.b.run(name, fn)— run a sub-benchmark. The parent function becomes a container and is not itself reported; the sub-benchmark is reported asparent/name.
Excluding setup from the measurement
fn bench_lookup(b: *zbench.Benchmark) !void {
var map: std.AutoHashMapUnmanaged(u64, u64) = .empty;
defer map.deinit(b.allocator);
for (0..1000) |k| try map.put(b.allocator, k, k *% 31);
b.reset_timer();
var hits: u64 = 0;
var i: u64 = 0;
while (i < b.n) : (i += 1) {
if (map.get(i % 1000)) |v| hits +%= v;
}
b.keep(hits);
}
Per-iteration setup that should not be measured:
while (i < b.n) : (i += 1) {
b.stop_timer();
const input = try generate_input(b.allocator);
defer b.allocator.free(input);
b.start_timer();
_ = process(input);
}
Sub-benchmarks
A parent function delegates to one or more sub-benchmarks via
b.run. Each sub is run as a fresh adaptive attempt and reported
on its own line:
fn bench_hash_sizes(b: *zbench.Benchmark) !void {
try b.run("sha256_16", gen_sha256(16));
try b.run("sha256_256", gen_sha256(256));
try b.run("sha256_4096", gen_sha256(4096));
}
fn gen_sha256(comptime size: usize) zbench.BenchFn {
return struct {
fn run(b: *zbench.Benchmark) !void {
var buf: [size]u8 = @splat(0xcd);
var out: [32]u8 = undefined;
b.reset_timer();
var i: u64 = 0;
while (i < b.n) : (i += 1) {
std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{});
b.keep(out);
}
b.set_bytes(size);
}
}.run;
}
Output:
hash_sizes/sha256_16 5000000 32.55 468.81
hash_sizes/sha256_256 1000000 134.26 1818.43
hash_sizes/sha256_4096 100000 1778.86 2195.92
Comptime-parametric benchmarks
Zig has no closures, so parameterize at compile time and register one benchmark per value:
inline for (.{ 16, 256, 4096 }) |size| {
try suite.add(
std.fmt.comptimePrint("memset_{d}", .{size}),
gen_bench_memset(size),
);
}
fn gen_bench_memset(comptime size: usize) zbench.BenchFn {
return struct {
fn run(b: *zbench.Benchmark) !void {
var buf: [size]u8 = undefined;
b.reset_timer();
var i: u64 = 0;
while (i < b.n) : (i += 1) {
@memset(&buf, @intCast(i & 0xff));
b.keep(buf);
}
b.set_bytes(size);
}
}.run;
}
CLI flags
| Flag | Default | Meaning |
|---|---|---|
--filter=<substring> |
none | Run only benchmarks whose name contains the substring. Use parent/leaf to target a specific sub-bench. |
--min-time=<dur> |
1s |
Minimum wall time per benchmark. Accepts s, ms, us, ns. |
--count=<n> |
1 |
Repeat each benchmark n times; output includes mean ± stddev (text) or samples (JSON). |
--max-iters=<n> |
1_000_000_000 |
Hard cap on iterations per attempt. |
--allocs |
off | Always print B/op and allocs/op columns. |
--format=text|json |
text |
Output format. JSON is ndjson — one object per line. |
--list |
— | Print names of all registered benchmarks and exit. |
--help, -h |
— | Print help. |
JSON output
--format=json emits one ndjson record per benchmark group:
{"name":"sha256_64","n":1000000,"ns_per_op":53.63,"bytes_per_op":0,"allocs_per_op":0,"mb_per_sec":1133.49,"count":3,"ns_per_op_mean":53.63,"ns_per_op_stddev":0.26,"ns_per_op_min":53.35,"samples":[53.71,53.35,53.85]}
Suitable for piping into a comparison tool, persisting in CI, or plotting.
A note on the optimizer
zig build defaults the benchmark executable to ReleaseFast. In that
mode the compiler will delete trivial work whose result is never used.
If you see 0.00 ns/op for a hot loop, that's the signal — wrap the
result inside the loop with b.keep(value):
var sum: u64 = 0;
var i: u64 = 0;
while (i < b.n) : (i += 1) {
sum +%= i *% 31;
b.keep(sum); // forces the optimizer to keep the work
}
Idioms
- Pass
init.gpaandinit.iofrommainstraight into the suite — zbench does not reach for global state. - The allocator exposed by
b.allocatorwraps yourgpafor accounting; use it as you would any allocator. Benchmarkmethods use snake_case to match the project's house style (reset_timer,set_bytes, …). Calls intostdkeep their upstream casing.
Running the bundled example
From the repo root:
zig build example -- --min-time=100ms
zig build example -- --filter=hash_sizes --count=3 --format=json
License
MIT — see LICENSE if present, or treat this as your project's
default license terms until one is added.