refactor: simplify after code review

Aggregated fixes from a three-axis review (reuse, quality, efficiency):

- benchmark.zig: store raw bytes_per_op on Benchmark instead of pre-
  multiplying by b.n inside set_bytes. Reset no longer clobbers it, so
  the call order no longer matters and the math moves to result
  finalization where n is final.
- runner.zig: drop the runner-initiated b.reset_timer (was duplicative
  with the user's own reset_timer at the top of every bench). Pack
  counter/sub_run/sub_run_ctx/io into a runner.Env so run_one's
  signature stays at four params instead of seven.
- reporter.zig: remove the [256]f64 stack buffer that silently
  truncated --count=500. Reporters now take a pre-computed Summary and
  raw ns_samples slice from the suite. Replace the hand-rolled JSON
  string escape with std.json.Stringify.encodeJsonString.
- suite.zig: replace four nullable active_* fields with an explicit
  RunCtx threaded through run_named and the sub_run trampoline.
  Allocate ns_samples once per benchmark group. Extract match_flag for
  the eight identical --foo= prefix checks and open_stderr for the
  duplicated stderr-writer dance.

`--count=500` now produces 500 JSON samples (was 256 silently). No
behavior change for users at the API level beyond the set_bytes
ordering footgun being removed.
This commit is contained in:
2026-05-21 08:33:24 +03:00
parent 3f52ff1eeb
commit cdbdb9b351
4 changed files with 198 additions and 202 deletions

View File

@@ -34,7 +34,9 @@ pub const Benchmark = struct {
accumulated_ns: i128 = 0,
timer_running: bool = true,
start_ts: Io.Timestamp = Io.Timestamp.zero,
bytes_processed: u64 = 0,
/// Bytes processed per iteration, as declared by `set_bytes`. The
/// reporter multiplies by `n` to derive total throughput in MB/s.
bytes_per_op: u64 = 0,
/// If the user calls `b.run`, this Benchmark is just a container — the
/// outer runner will skip reporting its own result.
is_container: bool = false,
@@ -43,10 +45,10 @@ pub const Benchmark = struct {
/// Start (or restart) the timer fresh: zero accumulated time, zero
/// allocation counters. Call after setup and before the measured loop.
/// `set_bytes` survives `reset_timer` — declare it at any point.
pub fn reset_timer(b: *Benchmark) void {
b.accumulated_ns = 0;
b.counter.reset();
b.bytes_processed = 0;
b.timer_running = true;
b.start_ts = Io.Timestamp.now(b.io, .awake);
}
@@ -67,10 +69,11 @@ pub const Benchmark = struct {
b.timer_running = true;
}
/// Record bytes processed by this iteration; the reporter divides by
/// `ns/op` to print MB/s.
/// Record bytes processed per iteration; the reporter multiplies by
/// `n` and divides by elapsed time to print MB/s. Safe to call before
/// or after `reset_timer`.
pub fn set_bytes(b: *Benchmark, bytes_per_op: u64) void {
b.bytes_processed = bytes_per_op *| b.n;
b.bytes_per_op = bytes_per_op;
}
/// Mark this benchmark so allocations columns are always printed,

View File

@@ -18,31 +18,25 @@ pub fn write_text_header(w: *Writer) !void {
});
}
/// Print one benchmark result group (>= 1 attempt) as a text row.
/// Render one benchmark group as a text row. `summary` summarizes the
/// `ns_per_op` distribution across all attempts in the group; `last`
/// supplies metadata (n, B/op, MB/s, …) that is stable across attempts.
pub fn write_text(
w: *Writer,
name: []const u8,
runs: []const Result,
last: Result,
summary: stats.Summary,
opts: ReportOpts,
) !void {
if (runs.len == 0) return;
if (runs[0].is_container) return;
var buf: [256]f64 = undefined;
const ns_samples: []f64 = buf[0..@min(runs.len, buf.len)];
for (ns_samples, runs[0..ns_samples.len]) |*s, r| s.* = r.ns_per_op;
const ns_summary = stats.summarize(ns_samples);
const last = runs[runs.len - 1];
const show_allocs = opts.always_allocs or last.force_report_allocs or
last.bytes_per_op > 0 or last.allocs_per_op > 0;
try w.print("{s:<30} {d:>10} ", .{ name, last.n });
if (runs.len > 1) {
try w.print("{d:>7.2} \xc2\xb1 {d:<2.2} ", .{ ns_summary.mean, ns_summary.stddev });
if (summary.n > 1) {
try w.print("{d:>7.2} \xc2\xb1 {d:<2.2} ", .{ summary.mean, summary.stddev });
} else {
try w.print("{d:>12.2} ", .{ ns_summary.mean });
try w.print("{d:>12.2} ", .{summary.mean});
}
if (show_allocs) {
@@ -63,22 +57,14 @@ pub fn write_text(
pub fn write_json(
w: *Writer,
name: []const u8,
runs: []const Result,
last: Result,
ns_samples: []const f64,
summary: stats.Summary,
) !void {
if (runs.len == 0) return;
if (runs[0].is_container) return;
var buf: [256]f64 = undefined;
const ns_samples: []f64 = buf[0..@min(runs.len, buf.len)];
for (ns_samples, runs[0..ns_samples.len]) |*s, r| s.* = r.ns_per_op;
const s = stats.summarize(ns_samples);
const last = runs[runs.len - 1];
try w.writeAll("{");
try write_json_string_field(w, "name", name);
try w.writeAll("{\"name\":");
try std.json.Stringify.encodeJsonString(name, .{}, w);
try w.print(",\"n\":{d}", .{last.n});
try w.print(",\"ns_per_op\":{d}", .{s.mean});
try w.print(",\"ns_per_op\":{d}", .{summary.mean});
try w.print(",\"bytes_per_op\":{d}", .{last.bytes_per_op});
try w.print(",\"allocs_per_op\":{d}", .{last.allocs_per_op});
@@ -88,12 +74,12 @@ pub fn write_json(
try w.writeAll(",\"mb_per_sec\":null");
}
try w.print(",\"count\":{d}", .{runs.len});
try w.print(",\"count\":{d}", .{summary.n});
if (runs.len > 1) {
if (summary.n > 1) {
try w.print(
",\"ns_per_op_mean\":{d},\"ns_per_op_stddev\":{d},\"ns_per_op_min\":{d}",
.{ s.mean, s.stddev, s.min },
.{ summary.mean, summary.stddev, summary.min },
);
try w.writeAll(",\"samples\":[");
for (ns_samples, 0..) |x, i| {
@@ -105,19 +91,3 @@ pub fn write_json(
try w.writeAll("}\n");
}
fn write_json_string_field(w: *Writer, key: []const u8, value: []const u8) !void {
try w.print("\"{s}\":\"", .{key});
for (value) |c| {
switch (c) {
'"' => try w.writeAll("\\\""),
'\\' => try w.writeAll("\\\\"),
'\n' => try w.writeAll("\\n"),
'\r' => try w.writeAll("\\r"),
'\t' => try w.writeAll("\\t"),
0x00...0x08, 0x0b, 0x0c, 0x0e...0x1f => try w.print("\\u{x:0>4}", .{c}),
else => try w.print("{c}", .{c}),
}
}
try w.writeAll("\"");
}

View File

@@ -28,80 +28,78 @@ pub const Options = struct {
max_iters: u64 = 1_000_000_000,
};
/// Adaptive single-run: grow `n` until `elapsed >= min_time_ns` or
/// `n >= max_iters`. Returns the final `Result`.
pub fn run_one(
name: []const u8,
f: BenchFn,
/// Per-suite collaborators threaded into every `run_one` call.
pub const Env = struct {
counter: *CountingAllocator,
sub_run: SubRunFn,
sub_run_ctx: *anyopaque,
io: std.Io,
opts: Options,
) !Result {
};
/// Adaptive single-run: grow `n` until `elapsed >= min_time_ns` or
/// `n >= max_iters`. Returns the final `Result`.
pub fn run_one(name: []const u8, f: BenchFn, env: Env, opts: Options) !Result {
var n: u64 = 1;
var last_elapsed_ns: u64 = 0;
var last_alloc_bytes: u64 = 0;
var last_alloc_count: u64 = 0;
var last_bytes_processed: u64 = 0;
var last_is_container: bool = false;
var last_force_report: bool = false;
var elapsed_ns: u64 = 0;
var alloc_bytes: u64 = 0;
var alloc_count: u64 = 0;
var bytes_per_op: u64 = 0;
var is_container = false;
var force_report = false;
while (true) {
env.counter.reset();
var b: Benchmark = .{
.n = n,
.allocator = counter.allocator(),
.io = io,
.allocator = env.counter.allocator(),
.io = env.io,
.name = name,
.counter = counter,
.sub_run = sub_run,
.sub_run_ctx = sub_run_ctx,
.counter = env.counter,
.sub_run = env.sub_run,
.sub_run_ctx = env.sub_run_ctx,
.start_ts = std.Io.Timestamp.now(env.io, .awake),
};
counter.reset();
b.reset_timer();
try f(&b);
b.finish();
const elapsed_i: i128 = if (b.accumulated_ns < 0) 0 else b.accumulated_ns;
last_elapsed_ns = @intCast(@min(elapsed_i, std.math.maxInt(u64)));
last_alloc_bytes = counter.bytes_allocated;
last_alloc_count = counter.allocs;
last_bytes_processed = b.bytes_processed;
last_is_container = b.is_container;
last_force_report = b.force_report_allocs;
elapsed_ns = if (b.accumulated_ns <= 0) 0 else @intCast(@min(b.accumulated_ns, std.math.maxInt(u64)));
alloc_bytes = env.counter.bytes_allocated;
alloc_count = env.counter.allocs;
bytes_per_op = b.bytes_per_op;
is_container = b.is_container;
force_report = b.force_report_allocs;
if (last_is_container) break;
if (last_elapsed_ns >= opts.min_time_ns) break;
if (is_container) break;
if (elapsed_ns >= opts.min_time_ns) break;
if (n >= opts.max_iters) break;
n = next_n(n, last_elapsed_ns, opts.min_time_ns, opts.max_iters);
n = next_n(n, elapsed_ns, opts.min_time_ns, opts.max_iters);
}
const fn_n: f64 = @floatFromInt(n);
const ns_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_elapsed_ns)) / fn_n;
const bytes_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_bytes)) / fn_n;
const allocs_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_count)) / fn_n;
const ns_per_op_v: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(elapsed_ns)) / fn_n;
const bytes_alloc_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(alloc_bytes)) / fn_n;
const allocs_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(alloc_count)) / fn_n;
const mb_per_sec: ?f64 = if (last_bytes_processed == 0 or last_elapsed_ns == 0)
const mb_per_sec: ?f64 = if (bytes_per_op == 0 or elapsed_ns == 0)
null
else blk: {
const bytes_f: f64 = @floatFromInt(last_bytes_processed);
const elapsed_s: f64 = @as(f64, @floatFromInt(last_elapsed_ns)) / @as(f64, std.time.ns_per_s);
break :blk (bytes_f / (1024.0 * 1024.0)) / elapsed_s;
const total_bytes: f64 = @floatFromInt(bytes_per_op *| n);
const elapsed_s: f64 = @as(f64, @floatFromInt(elapsed_ns)) / @as(f64, std.time.ns_per_s);
break :blk (total_bytes / (1024.0 * 1024.0)) / elapsed_s;
};
return .{
.name = name,
.n = n,
.elapsed_ns = last_elapsed_ns,
.ns_per_op = ns_per_op,
.bytes_per_op = bytes_per_op,
.elapsed_ns = elapsed_ns,
.ns_per_op = ns_per_op_v,
.bytes_per_op = bytes_alloc_per_op,
.allocs_per_op = allocs_per_op,
.mb_per_sec = mb_per_sec,
.force_report_allocs = last_force_report,
.is_container = last_is_container,
.force_report_allocs = force_report,
.is_container = is_container,
};
}
@@ -154,10 +152,8 @@ test "round_up snaps to nice numbers" {
}
test "next_n grows toward target" {
// first run: 0 ns -> jump by 100x
try std.testing.expectEqual(@as(u64, 100), next_n(1, 0, std.time.ns_per_s, 1 << 30));
// 100 iters in 1 ms; target 1s -> predicted = 1.2e6 * 100 / 1e6 = 120000, rounded 200000
const n2 = next_n(100, std.time.ns_per_ms, std.time.ns_per_s, 1 << 30);
try std.testing.expect(n2 > 100);
try std.testing.expect(n2 <= 100 * 100);

View File

@@ -6,6 +6,7 @@ const Writer = std.Io.Writer;
const bench = @import("benchmark.zig");
const runner = @import("runner.zig");
const reporter = @import("reporter.zig");
const stats = @import("stats.zig");
const CountingAllocator = @import("alloc.zig").CountingAllocator;
const Benchmark = bench.Benchmark;
@@ -33,14 +34,6 @@ pub const Suite = struct {
io: Io,
entries: std.ArrayListUnmanaged(Entry) = .empty,
/// Mutable state during a run — installed by `run`.
active_writer: ?*Writer = null,
active_opts: Options = .{},
active_counter: ?*CountingAllocator = null,
/// The composed name of the benchmark currently being executed.
/// Used by `sub_run_trampoline` to prefix sub-benchmark names.
current_name: ?[]const u8 = null,
pub fn init(gpa: Allocator, io: Io) Suite {
return .{ .gpa = gpa, .io = io };
}
@@ -60,121 +53,60 @@ pub const Suite = struct {
/// Run all registered benchmarks with `opts`, writing to `writer`.
pub fn run(self: *Suite, opts: Options, writer: *Writer) !void {
var counter = CountingAllocator.init(self.gpa);
self.active_writer = writer;
self.active_opts = opts;
self.active_counter = &counter;
defer {
self.active_writer = null;
self.active_counter = null;
self.current_name = null;
}
var ctx: RunCtx = .{
.gpa = self.gpa,
.io = self.io,
.writer = writer,
.opts = opts,
.counter = &counter,
};
if (opts.format == .text) try reporter.write_text_header(writer);
for (self.entries.items) |entry| {
if (opts.filter) |needle| {
// Substring match on the top-level name, or on the parent
// segment when the user passes a `parent/leaf` style filter.
const target = if (std.mem.indexOf(u8, needle, "/")) |i|
// For `parent/leaf` style filters, gate the top level on the
// parent segment so we still descend into matching subs.
const target = if (std.mem.indexOfScalar(u8, needle, '/')) |i|
needle[0..i]
else
needle;
if (std.mem.indexOf(u8, entry.name, target) == null) continue;
}
try self.run_named(entry.name, entry.f);
try run_named(&ctx, entry.name, entry.f);
}
try writer.flush();
}
/// Run a benchmark by composed name. Used both for top-level entries
/// and sub-benchmarks (`parent/sub`).
fn run_named(self: *Suite, full_name: []const u8, f: BenchFn) !void {
const opts = self.active_opts;
const writer = self.active_writer.?;
const counter = self.active_counter.?;
const saved_current = self.current_name;
self.current_name = full_name;
defer self.current_name = saved_current;
const count = @max(@as(u32, 1), opts.count);
const samples = try self.gpa.alloc(Result, count);
defer self.gpa.free(samples);
var i: u32 = 0;
while (i < count) : (i += 1) {
samples[i] = try runner.run_one(
full_name,
f,
counter,
Suite.sub_run_trampoline,
self,
self.io,
.{ .min_time_ns = opts.min_time_ns, .max_iters = opts.max_iters },
);
// If the user delegated to sub-benchmarks, no point repeating.
if (samples[i].is_container) {
i += 1;
break;
}
}
const used = samples[0..i];
switch (opts.format) {
.text => try reporter.write_text(
writer,
full_name,
used,
.{ .always_allocs = opts.always_allocs },
),
.json => try reporter.write_json(writer, full_name, used),
}
}
fn sub_run_trampoline(ctx: *anyopaque, sub_name: []const u8, f: BenchFn) anyerror!void {
const self: *Suite = @ptrCast(@alignCast(ctx));
const parent = self.current_name orelse "<orphan>";
const composed = try std.fmt.allocPrint(self.gpa, "{s}/{s}", .{ parent, sub_name });
defer self.gpa.free(composed);
// Apply filter against the composed name so users can target a
// specific sub-benchmark with --filter=parent/leaf or by substring.
if (self.active_opts.filter) |needle| {
if (std.mem.indexOf(u8, composed, needle) == null) return;
}
try self.run_named(composed, f);
}
/// CLI entrypoint: parse `proc_init.minimal.args`, then dispatch to `run`.
pub fn run_cli(self: *Suite, proc_init: std.process.Init) !void {
var args_it = std.process.Args.Iterator.init(proc_init.minimal.args);
_ = args_it.skip(); // argv[0]
var opts: Options = .{};
var list_only = false;
var args_it = std.process.Args.Iterator.init(proc_init.minimal.args);
_ = args_it.skip(); // argv[0]
while (args_it.next()) |raw| {
const arg: []const u8 = raw;
if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) {
try print_help(proc_init);
var buf: [2048]u8 = undefined;
var w = open_stderr(proc_init.io, &buf);
try print_help(&w.interface);
return;
} else if (std.mem.eql(u8, arg, "--list")) {
list_only = true;
} else if (std.mem.eql(u8, arg, "--allocs")) {
opts.always_allocs = true;
} else if (std.mem.startsWith(u8, arg, "--filter=")) {
opts.filter = arg["--filter=".len..];
} else if (std.mem.startsWith(u8, arg, "--min-time=")) {
opts.min_time_ns = try parse_duration_ns(arg["--min-time=".len..]);
} else if (std.mem.startsWith(u8, arg, "--count=")) {
opts.count = try std.fmt.parseInt(u32, arg["--count=".len..], 10);
} else if (std.mem.startsWith(u8, arg, "--max-iters=")) {
opts.max_iters = try std.fmt.parseInt(u64, arg["--max-iters=".len..], 10);
} else if (std.mem.startsWith(u8, arg, "--format=")) {
const v = arg["--format=".len..];
} else if (match_flag(arg, "--filter")) |v| {
opts.filter = v;
} else if (match_flag(arg, "--min-time")) |v| {
opts.min_time_ns = try parse_duration_ns(v);
} else if (match_flag(arg, "--count")) |v| {
opts.count = try std.fmt.parseInt(u32, v, 10);
} else if (match_flag(arg, "--max-iters")) |v| {
opts.max_iters = try std.fmt.parseInt(u64, v, 10);
} else if (match_flag(arg, "--format")) |v| {
if (std.mem.eql(u8, v, "text")) {
opts.format = .text;
} else if (std.mem.eql(u8, v, "json")) {
@@ -186,24 +118,112 @@ pub const Suite = struct {
}
var buf: [4096]u8 = undefined;
var stderr_file = std.Io.File.stderr().writerStreaming(proc_init.io, &buf);
var w = open_stderr(proc_init.io, &buf);
if (list_only) {
for (self.entries.items) |e| {
try stderr_file.interface.print("{s}\n", .{e.name});
}
try stderr_file.interface.flush();
for (self.entries.items) |e| try w.interface.print("{s}\n", .{e.name});
try w.interface.flush();
return;
}
try self.run(opts, &stderr_file.interface);
try self.run(opts, &w.interface);
}
};
fn print_help(proc_init: std.process.Init) !void {
var buf: [2048]u8 = undefined;
var stderr_file = std.Io.File.stderr().writerStreaming(proc_init.io, &buf);
const w = &stderr_file.interface;
/// Per-run state threaded through `run_named` and the sub-benchmark
/// trampoline. Avoids stashing mutable globals on `Suite`.
const RunCtx = struct {
gpa: Allocator,
io: Io,
writer: *Writer,
opts: Options,
counter: *CountingAllocator,
/// Composed name of the benchmark currently executing — used as the
/// prefix when its `f` invokes `b.run(...)`.
parent_name: ?[]const u8 = null,
};
/// Run a benchmark by composed name. Used both for top-level entries and
/// sub-benchmarks (parent/sub).
fn run_named(ctx: *RunCtx, full_name: []const u8, f: BenchFn) !void {
const saved = ctx.parent_name;
ctx.parent_name = full_name;
defer ctx.parent_name = saved;
const count = @max(@as(u32, 1), ctx.opts.count);
const samples = try ctx.gpa.alloc(Result, count);
defer ctx.gpa.free(samples);
const env: runner.Env = .{
.counter = ctx.counter,
.sub_run = sub_run_trampoline,
.sub_run_ctx = ctx,
.io = ctx.io,
};
const r_opts: runner.Options = .{
.min_time_ns = ctx.opts.min_time_ns,
.max_iters = ctx.opts.max_iters,
};
var i: u32 = 0;
while (i < count) : (i += 1) {
samples[i] = try runner.run_one(full_name, f, env, r_opts);
// Containers delegate to sub-benchmarks; their own row is meaningless
// and there's nothing to repeat across `--count`.
if (samples[i].is_container) {
i += 1;
break;
}
}
const used = samples[0..i];
if (used.len == 0 or used[used.len - 1].is_container) return;
const ns_samples = try ctx.gpa.alloc(f64, used.len);
defer ctx.gpa.free(ns_samples);
for (ns_samples, used) |*s, r| s.* = r.ns_per_op;
const summary = stats.summarize(ns_samples);
const last = used[used.len - 1];
switch (ctx.opts.format) {
.text => try reporter.write_text(
ctx.writer,
full_name,
last,
summary,
.{ .always_allocs = ctx.opts.always_allocs },
),
.json => try reporter.write_json(ctx.writer, full_name, last, ns_samples, summary),
}
}
fn sub_run_trampoline(opaque_ctx: *anyopaque, sub_name: []const u8, f: BenchFn) anyerror!void {
const ctx: *RunCtx = @ptrCast(@alignCast(opaque_ctx));
const parent = ctx.parent_name orelse "<orphan>";
const composed = try std.fmt.allocPrint(ctx.gpa, "{s}/{s}", .{ parent, sub_name });
defer ctx.gpa.free(composed);
// Sub-bench filter: match against the composed name so users can target
// `parent/leaf` or filter by substring.
if (ctx.opts.filter) |needle| {
if (std.mem.indexOf(u8, composed, needle) == null) return;
}
try run_named(ctx, composed, f);
}
fn open_stderr(io: Io, buf: []u8) std.Io.File.Writer {
return std.Io.File.stderr().writerStreaming(io, buf);
}
/// Match a `--name=value` style argument. Returns the value slice if the
/// argument's prefix is `name=`, otherwise null.
fn match_flag(arg: []const u8, comptime name: []const u8) ?[]const u8 {
const prefix = name ++ "=";
if (!std.mem.startsWith(u8, arg, prefix)) return null;
return arg[prefix.len..];
}
fn print_help(w: *Writer) !void {
try w.writeAll(
\\zbench — benchmark runner
\\
@@ -260,3 +280,10 @@ test "parse_duration_ns" {
try std.testing.expectError(error.InvalidDuration, parse_duration_ns(""));
try std.testing.expectError(error.InvalidDuration, parse_duration_ns("10xyz"));
}
test "match_flag" {
try std.testing.expectEqualStrings("foo", match_flag("--filter=foo", "--filter").?);
try std.testing.expectEqualStrings("", match_flag("--filter=", "--filter").?);
try std.testing.expectEqual(@as(?[]const u8, null), match_flag("--other=x", "--filter"));
try std.testing.expectEqual(@as(?[]const u8, null), match_flag("--filter", "--filter"));
}