Examples covering the main usage patterns:
- bench_append_u8: per-iteration heap work, demonstrates B/op tracking
via the wrapped allocator.
- bench_sha256_64: set_bytes() for throughput in MB/s.
- bench_integer_sum: shows the optimization pitfall with a comment —
trivial loop bodies need an in-loop b.keep to survive ReleaseFast.
- bench_hash_sizes: parent that delegates to sub-benchmarks via b.run,
printed as `hash_sizes/sha256_<size>`.
- memset_{16,256,4096}: comptime-parametric benchmark generation via
std.fmt.comptimePrint + a generic gen_bench factory.
Includes a stand-alone examples/bench/build.zig illustrating the
zbench_build.add_bench_step integration a downstream project would use.
101 lines
2.9 KiB
Zig
101 lines
2.9 KiB
Zig
const std = @import("std");
|
|
const zbench = @import("zbench");
|
|
|
|
pub fn main(init: std.process.Init) !void {
|
|
var suite = zbench.Suite.init(init.gpa, init.io);
|
|
defer suite.deinit();
|
|
|
|
try suite.add("append_u8", bench_append_u8);
|
|
try suite.add("sha256_64", bench_sha256_64);
|
|
try suite.add("integer_sum", bench_integer_sum);
|
|
try suite.add("hash_sizes", bench_hash_sizes);
|
|
|
|
// Comptime-parametric pattern: synthesize a benchmark per size.
|
|
inline for (.{ 16, 256, 4096 }) |size| {
|
|
try suite.add(
|
|
std.fmt.comptimePrint("memset_{d}", .{size}),
|
|
gen_bench_memset(size),
|
|
);
|
|
}
|
|
|
|
try suite.run_cli(init);
|
|
}
|
|
|
|
fn bench_append_u8(b: *zbench.Benchmark) !void {
|
|
var list: std.ArrayListUnmanaged(u8) = .empty;
|
|
defer list.deinit(b.allocator);
|
|
|
|
b.reset_timer();
|
|
var i: u64 = 0;
|
|
while (i < b.n) : (i += 1) {
|
|
try list.append(b.allocator, @intCast(i & 0xff));
|
|
}
|
|
b.keep(list.items);
|
|
}
|
|
|
|
fn bench_sha256_64(b: *zbench.Benchmark) !void {
|
|
var buf: [64]u8 = @splat(0xab);
|
|
|
|
b.reset_timer();
|
|
var out: [32]u8 = undefined;
|
|
var i: u64 = 0;
|
|
while (i < b.n) : (i += 1) {
|
|
std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{});
|
|
b.keep(out);
|
|
}
|
|
b.set_bytes(buf.len);
|
|
}
|
|
|
|
// Demonstrates how the compiler will erase trivial work in ReleaseFast unless
|
|
// the result is observed *inside* the loop. With `b.keep(sum)` at the end of
|
|
// each iteration the optimizer cannot prove the value is dead and is forced
|
|
// to keep the computation. Try removing the inner `b.keep` to see ns/op
|
|
// collapse to ~0.
|
|
fn bench_integer_sum(b: *zbench.Benchmark) !void {
|
|
b.reset_timer();
|
|
var sum: u64 = 0;
|
|
var i: u64 = 0;
|
|
while (i < b.n) : (i += 1) {
|
|
sum +%= i *% 31;
|
|
b.keep(sum);
|
|
}
|
|
}
|
|
|
|
/// Sub-benchmark example: one parent, multiple children.
|
|
fn bench_hash_sizes(b: *zbench.Benchmark) !void {
|
|
try b.run("sha256_16", gen_bench_sha256(16));
|
|
try b.run("sha256_256", gen_bench_sha256(256));
|
|
try b.run("sha256_4096", gen_bench_sha256(4096));
|
|
}
|
|
|
|
fn gen_bench_sha256(comptime size: usize) zbench.BenchFn {
|
|
return struct {
|
|
fn run(b: *zbench.Benchmark) !void {
|
|
var buf: [size]u8 = @splat(0xcd);
|
|
var out: [32]u8 = undefined;
|
|
b.reset_timer();
|
|
var i: u64 = 0;
|
|
while (i < b.n) : (i += 1) {
|
|
std.crypto.hash.sha2.Sha256.hash(&buf, &out, .{});
|
|
b.keep(out);
|
|
}
|
|
b.set_bytes(size);
|
|
}
|
|
}.run;
|
|
}
|
|
|
|
fn gen_bench_memset(comptime size: usize) zbench.BenchFn {
|
|
return struct {
|
|
fn run(b: *zbench.Benchmark) !void {
|
|
var buf: [size]u8 = undefined;
|
|
b.reset_timer();
|
|
var i: u64 = 0;
|
|
while (i < b.n) : (i += 1) {
|
|
@memset(&buf, @intCast(i & 0xff));
|
|
b.keep(buf);
|
|
}
|
|
b.set_bytes(size);
|
|
}
|
|
}.run;
|
|
}
|