docs: add README with usage examples
Covers setup (build.zig.zon + zbench_build helper), the minimal benchmark shape, the Benchmark API surface, timer control, sub-benchmarks, comptime-parametric generation, CLI flags, JSON output, and the optimizer pitfall around trivial loops.
This commit is contained in:
299
README.md
Normal file
299
README.md
Normal file
@@ -0,0 +1,299 @@
|
|||||||
|
# zbench
|
||||||
|
|
||||||
|
Go-style benchmarking for Zig 0.16+. No external dependencies. Adaptive
|
||||||
|
iteration count, per-op allocation tracking, throughput (MB/s),
|
||||||
|
sub-benchmarks, statistical repetition, and text or JSON output.
|
||||||
|
|
||||||
|
```
|
||||||
|
benchmark iters ns/op B/op allocs/op MB/s
|
||||||
|
append_u8 300000000 0.52 1 0
|
||||||
|
sha256_64 3000000 53.62 1138.37
|
||||||
|
hash_sizes/sha256_16 5000000 32.55 468.81
|
||||||
|
hash_sizes/sha256_256 1000000 134.26 1818.43
|
||||||
|
hash_sizes/sha256_4096 100000 1778.86 2195.92
|
||||||
|
```
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Zig 0.16.0 or newer.
|
||||||
|
|
||||||
|
## Adding the dependency
|
||||||
|
|
||||||
|
In your project's `build.zig.zon`:
|
||||||
|
|
||||||
|
```zig
|
||||||
|
.dependencies = .{
|
||||||
|
.zbench = .{
|
||||||
|
.url = "https://example.com/zbench-0.1.0.tar.gz",
|
||||||
|
.hash = "...",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
```
|
||||||
|
|
||||||
|
Or fetch from a local path during development:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
zig fetch --save=zbench ../path/to/zbench
|
||||||
|
```
|
||||||
|
|
||||||
|
## Wiring it up in `build.zig`
|
||||||
|
|
||||||
|
The library exposes two modules: `zbench` (the runtime API) and
|
||||||
|
`zbench_build` (a small build helper). A typical consumer `build.zig`:
|
||||||
|
|
||||||
|
```zig
|
||||||
|
const std = @import("std");
|
||||||
|
const zbench_build = @import("zbench_build");
|
||||||
|
|
||||||
|
pub fn build(b: *std.Build) void {
|
||||||
|
const target = b.standardTargetOptions(.{});
|
||||||
|
|
||||||
|
const zbench = b.dependency("zbench", .{
|
||||||
|
.target = target,
|
||||||
|
.optimize = .ReleaseFast,
|
||||||
|
});
|
||||||
|
|
||||||
|
_ = zbench_build.add_bench_step(b, .{
|
||||||
|
.step_name = "bench",
|
||||||
|
.root = b.path("bench/main.zig"),
|
||||||
|
.target = target,
|
||||||
|
.zbench = zbench.module("zbench"),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Now `zig build bench` compiles and runs the benchmark executable, and
|
||||||
|
`zig build bench -- --min-time=200ms --count=3` forwards CLI arguments
|
||||||
|
through to it.
|
||||||
|
|
||||||
|
If you prefer not to use the helper, you can build the executable
|
||||||
|
yourself and wire it up like any other `b.addExecutable` step — see
|
||||||
|
`examples/bench/build.zig` for the explicit version.
|
||||||
|
|
||||||
|
## Minimal benchmark
|
||||||
|
|
||||||
|
```zig
|
||||||
|
const std = @import("std");
|
||||||
|
const zbench = @import("zbench");
|
||||||
|
|
||||||
|
pub fn main(init: std.process.Init) !void {
|
||||||
|
var suite = zbench.Suite.init(init.gpa, init.io);
|
||||||
|
defer suite.deinit();
|
||||||
|
|
||||||
|
try suite.add("append", bench_append);
|
||||||
|
try suite.add("hash", bench_hash);
|
||||||
|
|
||||||
|
try suite.run_cli(init);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_append(b: *zbench.Benchmark) !void {
|
||||||
|
var list: std.ArrayListUnmanaged(u8) = .empty;
|
||||||
|
defer list.deinit(b.allocator);
|
||||||
|
|
||||||
|
b.reset_timer();
|
||||||
|
var i: u64 = 0;
|
||||||
|
while (i < b.n) : (i += 1) {
|
||||||
|
try list.append(b.allocator, @intCast(i & 0xff));
|
||||||
|
}
|
||||||
|
b.keep(list.items);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_hash(b: *zbench.Benchmark) !void {
|
||||||
|
var buf: [64]u8 = @splat(0xab);
|
||||||
|
var out: [32]u8 = undefined;
|
||||||
|
|
||||||
|
b.reset_timer();
|
||||||
|
var i: u64 = 0;
|
||||||
|
while (i < b.n) : (i += 1) {
|
||||||
|
std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{});
|
||||||
|
b.keep(out);
|
||||||
|
}
|
||||||
|
b.set_bytes(buf.len);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The mental model matches Go's `testing.B`: the runner calls your
|
||||||
|
function with an increasing `b.n` until the wall time crosses
|
||||||
|
`--min-time`. You write the loop, the framework picks how many times
|
||||||
|
to run it.
|
||||||
|
|
||||||
|
## The `Benchmark` API
|
||||||
|
|
||||||
|
- `b.n` — target iteration count for the current attempt.
|
||||||
|
- `b.allocator` — a wrapping allocator that counts allocations; use it
|
||||||
|
if you want `B/op` and `allocs/op` reported.
|
||||||
|
- `b.io` — `std.Io` for benchmarks that need to perform I/O.
|
||||||
|
- `b.reset_timer()` — call after setup, before the measured loop.
|
||||||
|
- `b.stop_timer()` / `b.start_timer()` — exclude per-iteration setup
|
||||||
|
from the measurement.
|
||||||
|
- `b.set_bytes(bytes_per_op)` — declare throughput; the reporter shows
|
||||||
|
`MB/s`.
|
||||||
|
- `b.report_allocs()` — force the `B/op` / `allocs/op` columns for
|
||||||
|
this benchmark, regardless of `--allocs`.
|
||||||
|
- `b.keep(value)` — optimization barrier; keeps a computed value alive
|
||||||
|
past the loop so ReleaseFast does not delete the work.
|
||||||
|
- `b.run(name, fn)` — run a sub-benchmark. The parent function becomes
|
||||||
|
a container and is not itself reported; the sub-benchmark is
|
||||||
|
reported as `parent/name`.
|
||||||
|
|
||||||
|
## Excluding setup from the measurement
|
||||||
|
|
||||||
|
```zig
|
||||||
|
fn bench_lookup(b: *zbench.Benchmark) !void {
|
||||||
|
var map: std.AutoHashMapUnmanaged(u64, u64) = .empty;
|
||||||
|
defer map.deinit(b.allocator);
|
||||||
|
for (0..1000) |k| try map.put(b.allocator, k, k *% 31);
|
||||||
|
|
||||||
|
b.reset_timer();
|
||||||
|
var hits: u64 = 0;
|
||||||
|
var i: u64 = 0;
|
||||||
|
while (i < b.n) : (i += 1) {
|
||||||
|
if (map.get(i % 1000)) |v| hits +%= v;
|
||||||
|
}
|
||||||
|
b.keep(hits);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Per-iteration setup that should not be measured:
|
||||||
|
|
||||||
|
```zig
|
||||||
|
while (i < b.n) : (i += 1) {
|
||||||
|
b.stop_timer();
|
||||||
|
const input = try generate_input(b.allocator);
|
||||||
|
defer b.allocator.free(input);
|
||||||
|
b.start_timer();
|
||||||
|
|
||||||
|
_ = process(input);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Sub-benchmarks
|
||||||
|
|
||||||
|
A parent function delegates to one or more sub-benchmarks via
|
||||||
|
`b.run`. Each sub is run as a fresh adaptive attempt and reported
|
||||||
|
on its own line:
|
||||||
|
|
||||||
|
```zig
|
||||||
|
fn bench_hash_sizes(b: *zbench.Benchmark) !void {
|
||||||
|
try b.run("sha256_16", gen_sha256(16));
|
||||||
|
try b.run("sha256_256", gen_sha256(256));
|
||||||
|
try b.run("sha256_4096", gen_sha256(4096));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_sha256(comptime size: usize) zbench.BenchFn {
|
||||||
|
return struct {
|
||||||
|
fn run(b: *zbench.Benchmark) !void {
|
||||||
|
var buf: [size]u8 = @splat(0xcd);
|
||||||
|
var out: [32]u8 = undefined;
|
||||||
|
b.reset_timer();
|
||||||
|
var i: u64 = 0;
|
||||||
|
while (i < b.n) : (i += 1) {
|
||||||
|
std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{});
|
||||||
|
b.keep(out);
|
||||||
|
}
|
||||||
|
b.set_bytes(size);
|
||||||
|
}
|
||||||
|
}.run;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
```
|
||||||
|
hash_sizes/sha256_16 5000000 32.55 468.81
|
||||||
|
hash_sizes/sha256_256 1000000 134.26 1818.43
|
||||||
|
hash_sizes/sha256_4096 100000 1778.86 2195.92
|
||||||
|
```
|
||||||
|
|
||||||
|
## Comptime-parametric benchmarks
|
||||||
|
|
||||||
|
Zig has no closures, so parameterize at compile time and register one
|
||||||
|
benchmark per value:
|
||||||
|
|
||||||
|
```zig
|
||||||
|
inline for (.{ 16, 256, 4096 }) |size| {
|
||||||
|
try suite.add(
|
||||||
|
std.fmt.comptimePrint("memset_{d}", .{size}),
|
||||||
|
gen_bench_memset(size),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_bench_memset(comptime size: usize) zbench.BenchFn {
|
||||||
|
return struct {
|
||||||
|
fn run(b: *zbench.Benchmark) !void {
|
||||||
|
var buf: [size]u8 = undefined;
|
||||||
|
b.reset_timer();
|
||||||
|
var i: u64 = 0;
|
||||||
|
while (i < b.n) : (i += 1) {
|
||||||
|
@memset(&buf, @intCast(i & 0xff));
|
||||||
|
b.keep(buf);
|
||||||
|
}
|
||||||
|
b.set_bytes(size);
|
||||||
|
}
|
||||||
|
}.run;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## CLI flags
|
||||||
|
|
||||||
|
| Flag | Default | Meaning |
|
||||||
|
|---|---|---|
|
||||||
|
| `--filter=<substring>` | _none_ | Run only benchmarks whose name contains the substring. Use `parent/leaf` to target a specific sub-bench. |
|
||||||
|
| `--min-time=<dur>` | `1s` | Minimum wall time per benchmark. Accepts `s`, `ms`, `us`, `ns`. |
|
||||||
|
| `--count=<n>` | `1` | Repeat each benchmark `n` times; output includes `mean ± stddev` (text) or `samples` (JSON). |
|
||||||
|
| `--max-iters=<n>` | `1_000_000_000` | Hard cap on iterations per attempt. |
|
||||||
|
| `--allocs` | off | Always print `B/op` and `allocs/op` columns. |
|
||||||
|
| `--format=text\|json` | `text` | Output format. JSON is ndjson — one object per line. |
|
||||||
|
| `--list` | — | Print names of all registered benchmarks and exit. |
|
||||||
|
| `--help`, `-h` | — | Print help. |
|
||||||
|
|
||||||
|
## JSON output
|
||||||
|
|
||||||
|
`--format=json` emits one ndjson record per benchmark group:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"name":"sha256_64","n":1000000,"ns_per_op":53.63,"bytes_per_op":0,"allocs_per_op":0,"mb_per_sec":1133.49,"count":3,"ns_per_op_mean":53.63,"ns_per_op_stddev":0.26,"ns_per_op_min":53.35,"samples":[53.71,53.35,53.85]}
|
||||||
|
```
|
||||||
|
|
||||||
|
Suitable for piping into a comparison tool, persisting in CI, or
|
||||||
|
plotting.
|
||||||
|
|
||||||
|
## A note on the optimizer
|
||||||
|
|
||||||
|
`zig build` defaults the benchmark executable to `ReleaseFast`. In that
|
||||||
|
mode the compiler will delete trivial work whose result is never used.
|
||||||
|
If you see `0.00 ns/op` for a hot loop, that's the signal — wrap the
|
||||||
|
result inside the loop with `b.keep(value)`:
|
||||||
|
|
||||||
|
```zig
|
||||||
|
var sum: u64 = 0;
|
||||||
|
var i: u64 = 0;
|
||||||
|
while (i < b.n) : (i += 1) {
|
||||||
|
sum +%= i *% 31;
|
||||||
|
b.keep(sum); // forces the optimizer to keep the work
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Idioms
|
||||||
|
|
||||||
|
- Pass `init.gpa` and `init.io` from `main` straight into the suite —
|
||||||
|
zbench does not reach for global state.
|
||||||
|
- The allocator exposed by `b.allocator` wraps your `gpa` for
|
||||||
|
accounting; use it as you would any allocator.
|
||||||
|
- `Benchmark` methods use snake_case to match the project's house
|
||||||
|
style (`reset_timer`, `set_bytes`, …). Calls into `std` keep their
|
||||||
|
upstream casing.
|
||||||
|
|
||||||
|
## Running the bundled example
|
||||||
|
|
||||||
|
From the repo root:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
zig build example -- --min-time=100ms
|
||||||
|
zig build example -- --filter=hash_sizes --count=3 --format=json
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT — see `LICENSE` if present, or treat this as your project's
|
||||||
|
default license terms until one is added.
|
||||||
Reference in New Issue
Block a user