From cdbdb9b351d6215eb48c84c550749bf9bd1bf503 Mon Sep 17 00:00:00 2001 From: Aleksey Shakhmatov Date: Thu, 21 May 2026 08:33:24 +0300 Subject: [PATCH] refactor: simplify after code review Aggregated fixes from a three-axis review (reuse, quality, efficiency): - benchmark.zig: store raw bytes_per_op on Benchmark instead of pre- multiplying by b.n inside set_bytes. Reset no longer clobbers it, so the call order no longer matters and the math moves to result finalization where n is final. - runner.zig: drop the runner-initiated b.reset_timer (was duplicative with the user's own reset_timer at the top of every bench). Pack counter/sub_run/sub_run_ctx/io into a runner.Env so run_one's signature stays at four params instead of seven. - reporter.zig: remove the [256]f64 stack buffer that silently truncated --count=500. Reporters now take a pre-computed Summary and raw ns_samples slice from the suite. Replace the hand-rolled JSON string escape with std.json.Stringify.encodeJsonString. - suite.zig: replace four nullable active_* fields with an explicit RunCtx threaded through run_named and the sub_run trampoline. Allocate ns_samples once per benchmark group. Extract match_flag for the eight identical --foo= prefix checks and open_stderr for the duplicated stderr-writer dance. `--count=500` now produces 500 JSON samples (was 256 silently). No behavior change for users at the API level beyond the set_bytes ordering footgun being removed. --- src/benchmark.zig | 13 ++- src/reporter.zig | 64 ++++--------- src/runner.zig | 86 ++++++++--------- src/suite.zig | 237 ++++++++++++++++++++++++++-------------------- 4 files changed, 198 insertions(+), 202 deletions(-) diff --git a/src/benchmark.zig b/src/benchmark.zig index d9747b6..5eb3e80 100644 --- a/src/benchmark.zig +++ b/src/benchmark.zig @@ -34,7 +34,9 @@ pub const Benchmark = struct { accumulated_ns: i128 = 0, timer_running: bool = true, start_ts: Io.Timestamp = Io.Timestamp.zero, - bytes_processed: u64 = 0, + /// Bytes processed per iteration, as declared by `set_bytes`. The + /// reporter multiplies by `n` to derive total throughput in MB/s. + bytes_per_op: u64 = 0, /// If the user calls `b.run`, this Benchmark is just a container — the /// outer runner will skip reporting its own result. is_container: bool = false, @@ -43,10 +45,10 @@ pub const Benchmark = struct { /// Start (or restart) the timer fresh: zero accumulated time, zero /// allocation counters. Call after setup and before the measured loop. + /// `set_bytes` survives `reset_timer` — declare it at any point. pub fn reset_timer(b: *Benchmark) void { b.accumulated_ns = 0; b.counter.reset(); - b.bytes_processed = 0; b.timer_running = true; b.start_ts = Io.Timestamp.now(b.io, .awake); } @@ -67,10 +69,11 @@ pub const Benchmark = struct { b.timer_running = true; } - /// Record bytes processed by this iteration; the reporter divides by - /// `ns/op` to print MB/s. + /// Record bytes processed per iteration; the reporter multiplies by + /// `n` and divides by elapsed time to print MB/s. Safe to call before + /// or after `reset_timer`. pub fn set_bytes(b: *Benchmark, bytes_per_op: u64) void { - b.bytes_processed = bytes_per_op *| b.n; + b.bytes_per_op = bytes_per_op; } /// Mark this benchmark so allocations columns are always printed, diff --git a/src/reporter.zig b/src/reporter.zig index dddd9b1..ea05b54 100644 --- a/src/reporter.zig +++ b/src/reporter.zig @@ -18,31 +18,25 @@ pub fn write_text_header(w: *Writer) !void { }); } -/// Print one benchmark result group (>= 1 attempt) as a text row. +/// Render one benchmark group as a text row. `summary` summarizes the +/// `ns_per_op` distribution across all attempts in the group; `last` +/// supplies metadata (n, B/op, MB/s, …) that is stable across attempts. pub fn write_text( w: *Writer, name: []const u8, - runs: []const Result, + last: Result, + summary: stats.Summary, opts: ReportOpts, ) !void { - if (runs.len == 0) return; - if (runs[0].is_container) return; - - var buf: [256]f64 = undefined; - const ns_samples: []f64 = buf[0..@min(runs.len, buf.len)]; - for (ns_samples, runs[0..ns_samples.len]) |*s, r| s.* = r.ns_per_op; - const ns_summary = stats.summarize(ns_samples); - - const last = runs[runs.len - 1]; const show_allocs = opts.always_allocs or last.force_report_allocs or last.bytes_per_op > 0 or last.allocs_per_op > 0; try w.print("{s:<30} {d:>10} ", .{ name, last.n }); - if (runs.len > 1) { - try w.print("{d:>7.2} \xc2\xb1 {d:<2.2} ", .{ ns_summary.mean, ns_summary.stddev }); + if (summary.n > 1) { + try w.print("{d:>7.2} \xc2\xb1 {d:<2.2} ", .{ summary.mean, summary.stddev }); } else { - try w.print("{d:>12.2} ", .{ ns_summary.mean }); + try w.print("{d:>12.2} ", .{summary.mean}); } if (show_allocs) { @@ -63,22 +57,14 @@ pub fn write_text( pub fn write_json( w: *Writer, name: []const u8, - runs: []const Result, + last: Result, + ns_samples: []const f64, + summary: stats.Summary, ) !void { - if (runs.len == 0) return; - if (runs[0].is_container) return; - - var buf: [256]f64 = undefined; - const ns_samples: []f64 = buf[0..@min(runs.len, buf.len)]; - for (ns_samples, runs[0..ns_samples.len]) |*s, r| s.* = r.ns_per_op; - const s = stats.summarize(ns_samples); - - const last = runs[runs.len - 1]; - - try w.writeAll("{"); - try write_json_string_field(w, "name", name); + try w.writeAll("{\"name\":"); + try std.json.Stringify.encodeJsonString(name, .{}, w); try w.print(",\"n\":{d}", .{last.n}); - try w.print(",\"ns_per_op\":{d}", .{s.mean}); + try w.print(",\"ns_per_op\":{d}", .{summary.mean}); try w.print(",\"bytes_per_op\":{d}", .{last.bytes_per_op}); try w.print(",\"allocs_per_op\":{d}", .{last.allocs_per_op}); @@ -88,12 +74,12 @@ pub fn write_json( try w.writeAll(",\"mb_per_sec\":null"); } - try w.print(",\"count\":{d}", .{runs.len}); + try w.print(",\"count\":{d}", .{summary.n}); - if (runs.len > 1) { + if (summary.n > 1) { try w.print( ",\"ns_per_op_mean\":{d},\"ns_per_op_stddev\":{d},\"ns_per_op_min\":{d}", - .{ s.mean, s.stddev, s.min }, + .{ summary.mean, summary.stddev, summary.min }, ); try w.writeAll(",\"samples\":["); for (ns_samples, 0..) |x, i| { @@ -105,19 +91,3 @@ pub fn write_json( try w.writeAll("}\n"); } - -fn write_json_string_field(w: *Writer, key: []const u8, value: []const u8) !void { - try w.print("\"{s}\":\"", .{key}); - for (value) |c| { - switch (c) { - '"' => try w.writeAll("\\\""), - '\\' => try w.writeAll("\\\\"), - '\n' => try w.writeAll("\\n"), - '\r' => try w.writeAll("\\r"), - '\t' => try w.writeAll("\\t"), - 0x00...0x08, 0x0b, 0x0c, 0x0e...0x1f => try w.print("\\u{x:0>4}", .{c}), - else => try w.print("{c}", .{c}), - } - } - try w.writeAll("\""); -} diff --git a/src/runner.zig b/src/runner.zig index 0123d54..17d5dfe 100644 --- a/src/runner.zig +++ b/src/runner.zig @@ -28,80 +28,78 @@ pub const Options = struct { max_iters: u64 = 1_000_000_000, }; -/// Adaptive single-run: grow `n` until `elapsed >= min_time_ns` or -/// `n >= max_iters`. Returns the final `Result`. -pub fn run_one( - name: []const u8, - f: BenchFn, +/// Per-suite collaborators threaded into every `run_one` call. +pub const Env = struct { counter: *CountingAllocator, sub_run: SubRunFn, sub_run_ctx: *anyopaque, io: std.Io, - opts: Options, -) !Result { +}; + +/// Adaptive single-run: grow `n` until `elapsed >= min_time_ns` or +/// `n >= max_iters`. Returns the final `Result`. +pub fn run_one(name: []const u8, f: BenchFn, env: Env, opts: Options) !Result { var n: u64 = 1; - var last_elapsed_ns: u64 = 0; - var last_alloc_bytes: u64 = 0; - var last_alloc_count: u64 = 0; - var last_bytes_processed: u64 = 0; - var last_is_container: bool = false; - var last_force_report: bool = false; + var elapsed_ns: u64 = 0; + var alloc_bytes: u64 = 0; + var alloc_count: u64 = 0; + var bytes_per_op: u64 = 0; + var is_container = false; + var force_report = false; while (true) { + env.counter.reset(); var b: Benchmark = .{ .n = n, - .allocator = counter.allocator(), - .io = io, + .allocator = env.counter.allocator(), + .io = env.io, .name = name, - .counter = counter, - .sub_run = sub_run, - .sub_run_ctx = sub_run_ctx, + .counter = env.counter, + .sub_run = env.sub_run, + .sub_run_ctx = env.sub_run_ctx, + .start_ts = std.Io.Timestamp.now(env.io, .awake), }; - counter.reset(); - b.reset_timer(); try f(&b); - b.finish(); - const elapsed_i: i128 = if (b.accumulated_ns < 0) 0 else b.accumulated_ns; - last_elapsed_ns = @intCast(@min(elapsed_i, std.math.maxInt(u64))); - last_alloc_bytes = counter.bytes_allocated; - last_alloc_count = counter.allocs; - last_bytes_processed = b.bytes_processed; - last_is_container = b.is_container; - last_force_report = b.force_report_allocs; + elapsed_ns = if (b.accumulated_ns <= 0) 0 else @intCast(@min(b.accumulated_ns, std.math.maxInt(u64))); + alloc_bytes = env.counter.bytes_allocated; + alloc_count = env.counter.allocs; + bytes_per_op = b.bytes_per_op; + is_container = b.is_container; + force_report = b.force_report_allocs; - if (last_is_container) break; - if (last_elapsed_ns >= opts.min_time_ns) break; + if (is_container) break; + if (elapsed_ns >= opts.min_time_ns) break; if (n >= opts.max_iters) break; - n = next_n(n, last_elapsed_ns, opts.min_time_ns, opts.max_iters); + n = next_n(n, elapsed_ns, opts.min_time_ns, opts.max_iters); } const fn_n: f64 = @floatFromInt(n); - const ns_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_elapsed_ns)) / fn_n; - const bytes_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_bytes)) / fn_n; - const allocs_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_count)) / fn_n; + const ns_per_op_v: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(elapsed_ns)) / fn_n; + const bytes_alloc_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(alloc_bytes)) / fn_n; + const allocs_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(alloc_count)) / fn_n; - const mb_per_sec: ?f64 = if (last_bytes_processed == 0 or last_elapsed_ns == 0) + const mb_per_sec: ?f64 = if (bytes_per_op == 0 or elapsed_ns == 0) null else blk: { - const bytes_f: f64 = @floatFromInt(last_bytes_processed); - const elapsed_s: f64 = @as(f64, @floatFromInt(last_elapsed_ns)) / @as(f64, std.time.ns_per_s); - break :blk (bytes_f / (1024.0 * 1024.0)) / elapsed_s; + const total_bytes: f64 = @floatFromInt(bytes_per_op *| n); + const elapsed_s: f64 = @as(f64, @floatFromInt(elapsed_ns)) / @as(f64, std.time.ns_per_s); + break :blk (total_bytes / (1024.0 * 1024.0)) / elapsed_s; }; return .{ .name = name, .n = n, - .elapsed_ns = last_elapsed_ns, - .ns_per_op = ns_per_op, - .bytes_per_op = bytes_per_op, + .elapsed_ns = elapsed_ns, + .ns_per_op = ns_per_op_v, + .bytes_per_op = bytes_alloc_per_op, .allocs_per_op = allocs_per_op, .mb_per_sec = mb_per_sec, - .force_report_allocs = last_force_report, - .is_container = last_is_container, + .force_report_allocs = force_report, + .is_container = is_container, }; } @@ -154,10 +152,8 @@ test "round_up snaps to nice numbers" { } test "next_n grows toward target" { - // first run: 0 ns -> jump by 100x try std.testing.expectEqual(@as(u64, 100), next_n(1, 0, std.time.ns_per_s, 1 << 30)); - // 100 iters in 1 ms; target 1s -> predicted = 1.2e6 * 100 / 1e6 = 120000, rounded 200000 const n2 = next_n(100, std.time.ns_per_ms, std.time.ns_per_s, 1 << 30); try std.testing.expect(n2 > 100); try std.testing.expect(n2 <= 100 * 100); diff --git a/src/suite.zig b/src/suite.zig index f14406a..dd582f0 100644 --- a/src/suite.zig +++ b/src/suite.zig @@ -6,6 +6,7 @@ const Writer = std.Io.Writer; const bench = @import("benchmark.zig"); const runner = @import("runner.zig"); const reporter = @import("reporter.zig"); +const stats = @import("stats.zig"); const CountingAllocator = @import("alloc.zig").CountingAllocator; const Benchmark = bench.Benchmark; @@ -33,14 +34,6 @@ pub const Suite = struct { io: Io, entries: std.ArrayListUnmanaged(Entry) = .empty, - /// Mutable state during a run — installed by `run`. - active_writer: ?*Writer = null, - active_opts: Options = .{}, - active_counter: ?*CountingAllocator = null, - /// The composed name of the benchmark currently being executed. - /// Used by `sub_run_trampoline` to prefix sub-benchmark names. - current_name: ?[]const u8 = null, - pub fn init(gpa: Allocator, io: Io) Suite { return .{ .gpa = gpa, .io = io }; } @@ -60,121 +53,60 @@ pub const Suite = struct { /// Run all registered benchmarks with `opts`, writing to `writer`. pub fn run(self: *Suite, opts: Options, writer: *Writer) !void { var counter = CountingAllocator.init(self.gpa); - - self.active_writer = writer; - self.active_opts = opts; - self.active_counter = &counter; - defer { - self.active_writer = null; - self.active_counter = null; - self.current_name = null; - } + var ctx: RunCtx = .{ + .gpa = self.gpa, + .io = self.io, + .writer = writer, + .opts = opts, + .counter = &counter, + }; if (opts.format == .text) try reporter.write_text_header(writer); for (self.entries.items) |entry| { if (opts.filter) |needle| { - // Substring match on the top-level name, or on the parent - // segment when the user passes a `parent/leaf` style filter. - const target = if (std.mem.indexOf(u8, needle, "/")) |i| + // For `parent/leaf` style filters, gate the top level on the + // parent segment so we still descend into matching subs. + const target = if (std.mem.indexOfScalar(u8, needle, '/')) |i| needle[0..i] else needle; if (std.mem.indexOf(u8, entry.name, target) == null) continue; } - try self.run_named(entry.name, entry.f); + try run_named(&ctx, entry.name, entry.f); } try writer.flush(); } - /// Run a benchmark by composed name. Used both for top-level entries - /// and sub-benchmarks (`parent/sub`). - fn run_named(self: *Suite, full_name: []const u8, f: BenchFn) !void { - const opts = self.active_opts; - const writer = self.active_writer.?; - const counter = self.active_counter.?; - - const saved_current = self.current_name; - self.current_name = full_name; - defer self.current_name = saved_current; - - const count = @max(@as(u32, 1), opts.count); - const samples = try self.gpa.alloc(Result, count); - defer self.gpa.free(samples); - - var i: u32 = 0; - while (i < count) : (i += 1) { - samples[i] = try runner.run_one( - full_name, - f, - counter, - Suite.sub_run_trampoline, - self, - self.io, - .{ .min_time_ns = opts.min_time_ns, .max_iters = opts.max_iters }, - ); - // If the user delegated to sub-benchmarks, no point repeating. - if (samples[i].is_container) { - i += 1; - break; - } - } - const used = samples[0..i]; - - switch (opts.format) { - .text => try reporter.write_text( - writer, - full_name, - used, - .{ .always_allocs = opts.always_allocs }, - ), - .json => try reporter.write_json(writer, full_name, used), - } - } - - fn sub_run_trampoline(ctx: *anyopaque, sub_name: []const u8, f: BenchFn) anyerror!void { - const self: *Suite = @ptrCast(@alignCast(ctx)); - const parent = self.current_name orelse ""; - const composed = try std.fmt.allocPrint(self.gpa, "{s}/{s}", .{ parent, sub_name }); - defer self.gpa.free(composed); - - // Apply filter against the composed name so users can target a - // specific sub-benchmark with --filter=parent/leaf or by substring. - if (self.active_opts.filter) |needle| { - if (std.mem.indexOf(u8, composed, needle) == null) return; - } - - try self.run_named(composed, f); - } - /// CLI entrypoint: parse `proc_init.minimal.args`, then dispatch to `run`. pub fn run_cli(self: *Suite, proc_init: std.process.Init) !void { - var args_it = std.process.Args.Iterator.init(proc_init.minimal.args); - _ = args_it.skip(); // argv[0] - var opts: Options = .{}; var list_only = false; + var args_it = std.process.Args.Iterator.init(proc_init.minimal.args); + _ = args_it.skip(); // argv[0] + while (args_it.next()) |raw| { const arg: []const u8 = raw; if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) { - try print_help(proc_init); + var buf: [2048]u8 = undefined; + var w = open_stderr(proc_init.io, &buf); + try print_help(&w.interface); return; } else if (std.mem.eql(u8, arg, "--list")) { list_only = true; } else if (std.mem.eql(u8, arg, "--allocs")) { opts.always_allocs = true; - } else if (std.mem.startsWith(u8, arg, "--filter=")) { - opts.filter = arg["--filter=".len..]; - } else if (std.mem.startsWith(u8, arg, "--min-time=")) { - opts.min_time_ns = try parse_duration_ns(arg["--min-time=".len..]); - } else if (std.mem.startsWith(u8, arg, "--count=")) { - opts.count = try std.fmt.parseInt(u32, arg["--count=".len..], 10); - } else if (std.mem.startsWith(u8, arg, "--max-iters=")) { - opts.max_iters = try std.fmt.parseInt(u64, arg["--max-iters=".len..], 10); - } else if (std.mem.startsWith(u8, arg, "--format=")) { - const v = arg["--format=".len..]; + } else if (match_flag(arg, "--filter")) |v| { + opts.filter = v; + } else if (match_flag(arg, "--min-time")) |v| { + opts.min_time_ns = try parse_duration_ns(v); + } else if (match_flag(arg, "--count")) |v| { + opts.count = try std.fmt.parseInt(u32, v, 10); + } else if (match_flag(arg, "--max-iters")) |v| { + opts.max_iters = try std.fmt.parseInt(u64, v, 10); + } else if (match_flag(arg, "--format")) |v| { if (std.mem.eql(u8, v, "text")) { opts.format = .text; } else if (std.mem.eql(u8, v, "json")) { @@ -186,24 +118,112 @@ pub const Suite = struct { } var buf: [4096]u8 = undefined; - var stderr_file = std.Io.File.stderr().writerStreaming(proc_init.io, &buf); + var w = open_stderr(proc_init.io, &buf); if (list_only) { - for (self.entries.items) |e| { - try stderr_file.interface.print("{s}\n", .{e.name}); - } - try stderr_file.interface.flush(); + for (self.entries.items) |e| try w.interface.print("{s}\n", .{e.name}); + try w.interface.flush(); return; } - try self.run(opts, &stderr_file.interface); + try self.run(opts, &w.interface); } }; -fn print_help(proc_init: std.process.Init) !void { - var buf: [2048]u8 = undefined; - var stderr_file = std.Io.File.stderr().writerStreaming(proc_init.io, &buf); - const w = &stderr_file.interface; +/// Per-run state threaded through `run_named` and the sub-benchmark +/// trampoline. Avoids stashing mutable globals on `Suite`. +const RunCtx = struct { + gpa: Allocator, + io: Io, + writer: *Writer, + opts: Options, + counter: *CountingAllocator, + /// Composed name of the benchmark currently executing — used as the + /// prefix when its `f` invokes `b.run(...)`. + parent_name: ?[]const u8 = null, +}; + +/// Run a benchmark by composed name. Used both for top-level entries and +/// sub-benchmarks (parent/sub). +fn run_named(ctx: *RunCtx, full_name: []const u8, f: BenchFn) !void { + const saved = ctx.parent_name; + ctx.parent_name = full_name; + defer ctx.parent_name = saved; + + const count = @max(@as(u32, 1), ctx.opts.count); + const samples = try ctx.gpa.alloc(Result, count); + defer ctx.gpa.free(samples); + + const env: runner.Env = .{ + .counter = ctx.counter, + .sub_run = sub_run_trampoline, + .sub_run_ctx = ctx, + .io = ctx.io, + }; + const r_opts: runner.Options = .{ + .min_time_ns = ctx.opts.min_time_ns, + .max_iters = ctx.opts.max_iters, + }; + + var i: u32 = 0; + while (i < count) : (i += 1) { + samples[i] = try runner.run_one(full_name, f, env, r_opts); + // Containers delegate to sub-benchmarks; their own row is meaningless + // and there's nothing to repeat across `--count`. + if (samples[i].is_container) { + i += 1; + break; + } + } + const used = samples[0..i]; + if (used.len == 0 or used[used.len - 1].is_container) return; + + const ns_samples = try ctx.gpa.alloc(f64, used.len); + defer ctx.gpa.free(ns_samples); + for (ns_samples, used) |*s, r| s.* = r.ns_per_op; + + const summary = stats.summarize(ns_samples); + const last = used[used.len - 1]; + + switch (ctx.opts.format) { + .text => try reporter.write_text( + ctx.writer, + full_name, + last, + summary, + .{ .always_allocs = ctx.opts.always_allocs }, + ), + .json => try reporter.write_json(ctx.writer, full_name, last, ns_samples, summary), + } +} + +fn sub_run_trampoline(opaque_ctx: *anyopaque, sub_name: []const u8, f: BenchFn) anyerror!void { + const ctx: *RunCtx = @ptrCast(@alignCast(opaque_ctx)); + const parent = ctx.parent_name orelse ""; + const composed = try std.fmt.allocPrint(ctx.gpa, "{s}/{s}", .{ parent, sub_name }); + defer ctx.gpa.free(composed); + + // Sub-bench filter: match against the composed name so users can target + // `parent/leaf` or filter by substring. + if (ctx.opts.filter) |needle| { + if (std.mem.indexOf(u8, composed, needle) == null) return; + } + try run_named(ctx, composed, f); +} + +fn open_stderr(io: Io, buf: []u8) std.Io.File.Writer { + return std.Io.File.stderr().writerStreaming(io, buf); +} + +/// Match a `--name=value` style argument. Returns the value slice if the +/// argument's prefix is `name=`, otherwise null. +fn match_flag(arg: []const u8, comptime name: []const u8) ?[]const u8 { + const prefix = name ++ "="; + if (!std.mem.startsWith(u8, arg, prefix)) return null; + return arg[prefix.len..]; +} + +fn print_help(w: *Writer) !void { try w.writeAll( \\zbench — benchmark runner \\ @@ -260,3 +280,10 @@ test "parse_duration_ns" { try std.testing.expectError(error.InvalidDuration, parse_duration_ns("")); try std.testing.expectError(error.InvalidDuration, parse_duration_ns("10xyz")); } + +test "match_flag" { + try std.testing.expectEqualStrings("foo", match_flag("--filter=foo", "--filter").?); + try std.testing.expectEqualStrings("", match_flag("--filter=", "--filter").?); + try std.testing.expectEqual(@as(?[]const u8, null), match_flag("--other=x", "--filter")); + try std.testing.expectEqual(@as(?[]const u8, null), match_flag("--filter", "--filter")); +}