diff --git a/README.md b/README.md new file mode 100644 index 0000000..a2f5c6a --- /dev/null +++ b/README.md @@ -0,0 +1,299 @@ +# zbench + +Go-style benchmarking for Zig 0.16+. No external dependencies. Adaptive +iteration count, per-op allocation tracking, throughput (MB/s), +sub-benchmarks, statistical repetition, and text or JSON output. + +``` +benchmark iters ns/op B/op allocs/op MB/s +append_u8 300000000 0.52 1 0 +sha256_64 3000000 53.62 1138.37 +hash_sizes/sha256_16 5000000 32.55 468.81 +hash_sizes/sha256_256 1000000 134.26 1818.43 +hash_sizes/sha256_4096 100000 1778.86 2195.92 +``` + +## Requirements + +- Zig 0.16.0 or newer. + +## Adding the dependency + +In your project's `build.zig.zon`: + +```zig +.dependencies = .{ + .zbench = .{ + .url = "https://example.com/zbench-0.1.0.tar.gz", + .hash = "...", + }, +}, +``` + +Or fetch from a local path during development: + +```sh +zig fetch --save=zbench ../path/to/zbench +``` + +## Wiring it up in `build.zig` + +The library exposes two modules: `zbench` (the runtime API) and +`zbench_build` (a small build helper). A typical consumer `build.zig`: + +```zig +const std = @import("std"); +const zbench_build = @import("zbench_build"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + + const zbench = b.dependency("zbench", .{ + .target = target, + .optimize = .ReleaseFast, + }); + + _ = zbench_build.add_bench_step(b, .{ + .step_name = "bench", + .root = b.path("bench/main.zig"), + .target = target, + .zbench = zbench.module("zbench"), + }); +} +``` + +Now `zig build bench` compiles and runs the benchmark executable, and +`zig build bench -- --min-time=200ms --count=3` forwards CLI arguments +through to it. + +If you prefer not to use the helper, you can build the executable +yourself and wire it up like any other `b.addExecutable` step — see +`examples/bench/build.zig` for the explicit version. + +## Minimal benchmark + +```zig +const std = @import("std"); +const zbench = @import("zbench"); + +pub fn main(init: std.process.Init) !void { + var suite = zbench.Suite.init(init.gpa, init.io); + defer suite.deinit(); + + try suite.add("append", bench_append); + try suite.add("hash", bench_hash); + + try suite.run_cli(init); +} + +fn bench_append(b: *zbench.Benchmark) !void { + var list: std.ArrayListUnmanaged(u8) = .empty; + defer list.deinit(b.allocator); + + b.reset_timer(); + var i: u64 = 0; + while (i < b.n) : (i += 1) { + try list.append(b.allocator, @intCast(i & 0xff)); + } + b.keep(list.items); +} + +fn bench_hash(b: *zbench.Benchmark) !void { + var buf: [64]u8 = @splat(0xab); + var out: [32]u8 = undefined; + + b.reset_timer(); + var i: u64 = 0; + while (i < b.n) : (i += 1) { + std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{}); + b.keep(out); + } + b.set_bytes(buf.len); +} +``` + +The mental model matches Go's `testing.B`: the runner calls your +function with an increasing `b.n` until the wall time crosses +`--min-time`. You write the loop, the framework picks how many times +to run it. + +## The `Benchmark` API + +- `b.n` — target iteration count for the current attempt. +- `b.allocator` — a wrapping allocator that counts allocations; use it + if you want `B/op` and `allocs/op` reported. +- `b.io` — `std.Io` for benchmarks that need to perform I/O. +- `b.reset_timer()` — call after setup, before the measured loop. +- `b.stop_timer()` / `b.start_timer()` — exclude per-iteration setup + from the measurement. +- `b.set_bytes(bytes_per_op)` — declare throughput; the reporter shows + `MB/s`. +- `b.report_allocs()` — force the `B/op` / `allocs/op` columns for + this benchmark, regardless of `--allocs`. +- `b.keep(value)` — optimization barrier; keeps a computed value alive + past the loop so ReleaseFast does not delete the work. +- `b.run(name, fn)` — run a sub-benchmark. The parent function becomes + a container and is not itself reported; the sub-benchmark is + reported as `parent/name`. + +## Excluding setup from the measurement + +```zig +fn bench_lookup(b: *zbench.Benchmark) !void { + var map: std.AutoHashMapUnmanaged(u64, u64) = .empty; + defer map.deinit(b.allocator); + for (0..1000) |k| try map.put(b.allocator, k, k *% 31); + + b.reset_timer(); + var hits: u64 = 0; + var i: u64 = 0; + while (i < b.n) : (i += 1) { + if (map.get(i % 1000)) |v| hits +%= v; + } + b.keep(hits); +} +``` + +Per-iteration setup that should not be measured: + +```zig +while (i < b.n) : (i += 1) { + b.stop_timer(); + const input = try generate_input(b.allocator); + defer b.allocator.free(input); + b.start_timer(); + + _ = process(input); +} +``` + +## Sub-benchmarks + +A parent function delegates to one or more sub-benchmarks via +`b.run`. Each sub is run as a fresh adaptive attempt and reported +on its own line: + +```zig +fn bench_hash_sizes(b: *zbench.Benchmark) !void { + try b.run("sha256_16", gen_sha256(16)); + try b.run("sha256_256", gen_sha256(256)); + try b.run("sha256_4096", gen_sha256(4096)); +} + +fn gen_sha256(comptime size: usize) zbench.BenchFn { + return struct { + fn run(b: *zbench.Benchmark) !void { + var buf: [size]u8 = @splat(0xcd); + var out: [32]u8 = undefined; + b.reset_timer(); + var i: u64 = 0; + while (i < b.n) : (i += 1) { + std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{}); + b.keep(out); + } + b.set_bytes(size); + } + }.run; +} +``` + +Output: + +``` +hash_sizes/sha256_16 5000000 32.55 468.81 +hash_sizes/sha256_256 1000000 134.26 1818.43 +hash_sizes/sha256_4096 100000 1778.86 2195.92 +``` + +## Comptime-parametric benchmarks + +Zig has no closures, so parameterize at compile time and register one +benchmark per value: + +```zig +inline for (.{ 16, 256, 4096 }) |size| { + try suite.add( + std.fmt.comptimePrint("memset_{d}", .{size}), + gen_bench_memset(size), + ); +} + +fn gen_bench_memset(comptime size: usize) zbench.BenchFn { + return struct { + fn run(b: *zbench.Benchmark) !void { + var buf: [size]u8 = undefined; + b.reset_timer(); + var i: u64 = 0; + while (i < b.n) : (i += 1) { + @memset(&buf, @intCast(i & 0xff)); + b.keep(buf); + } + b.set_bytes(size); + } + }.run; +} +``` + +## CLI flags + +| Flag | Default | Meaning | +|---|---|---| +| `--filter=` | _none_ | Run only benchmarks whose name contains the substring. Use `parent/leaf` to target a specific sub-bench. | +| `--min-time=` | `1s` | Minimum wall time per benchmark. Accepts `s`, `ms`, `us`, `ns`. | +| `--count=` | `1` | Repeat each benchmark `n` times; output includes `mean ± stddev` (text) or `samples` (JSON). | +| `--max-iters=` | `1_000_000_000` | Hard cap on iterations per attempt. | +| `--allocs` | off | Always print `B/op` and `allocs/op` columns. | +| `--format=text\|json` | `text` | Output format. JSON is ndjson — one object per line. | +| `--list` | — | Print names of all registered benchmarks and exit. | +| `--help`, `-h` | — | Print help. | + +## JSON output + +`--format=json` emits one ndjson record per benchmark group: + +```json +{"name":"sha256_64","n":1000000,"ns_per_op":53.63,"bytes_per_op":0,"allocs_per_op":0,"mb_per_sec":1133.49,"count":3,"ns_per_op_mean":53.63,"ns_per_op_stddev":0.26,"ns_per_op_min":53.35,"samples":[53.71,53.35,53.85]} +``` + +Suitable for piping into a comparison tool, persisting in CI, or +plotting. + +## A note on the optimizer + +`zig build` defaults the benchmark executable to `ReleaseFast`. In that +mode the compiler will delete trivial work whose result is never used. +If you see `0.00 ns/op` for a hot loop, that's the signal — wrap the +result inside the loop with `b.keep(value)`: + +```zig +var sum: u64 = 0; +var i: u64 = 0; +while (i < b.n) : (i += 1) { + sum +%= i *% 31; + b.keep(sum); // forces the optimizer to keep the work +} +``` + +## Idioms + +- Pass `init.gpa` and `init.io` from `main` straight into the suite — + zbench does not reach for global state. +- The allocator exposed by `b.allocator` wraps your `gpa` for + accounting; use it as you would any allocator. +- `Benchmark` methods use snake_case to match the project's house + style (`reset_timer`, `set_bytes`, …). Calls into `std` keep their + upstream casing. + +## Running the bundled example + +From the repo root: + +```sh +zig build example -- --min-time=100ms +zig build example -- --filter=hash_sizes --count=3 --format=json +``` + +## License + +MIT — see `LICENSE` if present, or treat this as your project's +default license terms until one is added.