feat: core runner — counting allocator, stats, Benchmark, adaptive N

Adds the measurement core:

- CountingAllocator: a thin wrapper over std.mem.Allocator vtable that
  tracks alloc/free counts and total bytes allocated, exposing a reset()
  used between attempts of the adaptive loop.
- stats.summarize: min / mean / sample stddev over an array of f64,
  used to aggregate --count repetitions.
- Benchmark: user-facing handle passed to each bench fn — n,
  allocator (counting-wrapped), io, with snake_case methods
  reset_timer / stop_timer / start_timer / set_bytes / report_allocs /
  keep / run (sub-bench). Times via std.Io.Timestamp.now(io, .awake).
- runner.run_one: galloping iteration count toward --min-time, with
  ×100 growth cap per step and "nice-number" rounding, matching Go's
  testing pkg heuristic.
This commit is contained in:
2026-05-21 08:12:56 +03:00
parent be72707042
commit e28eb3c22e
4 changed files with 422 additions and 0 deletions

94
src/alloc.zig Normal file
View File

@@ -0,0 +1,94 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const Alignment = std.mem.Alignment;
pub const CountingAllocator = struct {
inner: Allocator,
bytes_allocated: u64 = 0,
allocs: u64 = 0,
frees: u64 = 0,
pub fn init(inner: Allocator) CountingAllocator {
return .{ .inner = inner };
}
pub fn reset(self: *CountingAllocator) void {
self.bytes_allocated = 0;
self.allocs = 0;
self.frees = 0;
}
pub fn allocator(self: *CountingAllocator) Allocator {
return .{
.ptr = self,
.vtable = &.{
.alloc = alloc,
.resize = resize,
.remap = remap,
.free = free,
},
};
}
fn alloc(ctx: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize) ?[*]u8 {
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
const result = self.inner.vtable.alloc(self.inner.ptr, len, alignment, ret_addr);
if (result) |_| {
self.allocs += 1;
self.bytes_allocated += len;
}
return result;
}
fn resize(ctx: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) bool {
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
const ok = self.inner.vtable.resize(self.inner.ptr, memory, alignment, new_len, ret_addr);
if (ok and new_len > memory.len) {
self.bytes_allocated += new_len - memory.len;
}
return ok;
}
fn remap(ctx: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) ?[*]u8 {
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
const result = self.inner.vtable.remap(self.inner.ptr, memory, alignment, new_len, ret_addr);
if (result) |_| {
if (new_len > memory.len) self.bytes_allocated += new_len - memory.len;
}
return result;
}
fn free(ctx: *anyopaque, memory: []u8, alignment: Alignment, ret_addr: usize) void {
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
self.inner.vtable.free(self.inner.ptr, memory, alignment, ret_addr);
self.frees += 1;
}
};
test "CountingAllocator counts allocations" {
var counter = CountingAllocator.init(std.testing.allocator);
const a = counter.allocator();
const buf1 = try a.alloc(u8, 64);
const buf2 = try a.alloc(u8, 128);
try std.testing.expectEqual(@as(u64, 2), counter.allocs);
try std.testing.expectEqual(@as(u64, 192), counter.bytes_allocated);
a.free(buf1);
a.free(buf2);
try std.testing.expectEqual(@as(u64, 2), counter.frees);
}
test "CountingAllocator reset clears counters" {
var counter = CountingAllocator.init(std.testing.allocator);
const a = counter.allocator();
const buf = try a.alloc(u8, 32);
defer a.free(buf);
try std.testing.expect(counter.allocs > 0);
counter.reset();
try std.testing.expectEqual(@as(u64, 0), counter.allocs);
try std.testing.expectEqual(@as(u64, 0), counter.bytes_allocated);
try std.testing.expectEqual(@as(u64, 0), counter.frees);
}

107
src/benchmark.zig Normal file
View File

@@ -0,0 +1,107 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const Io = std.Io;
const CountingAllocator = @import("alloc.zig").CountingAllocator;
pub const BenchFn = *const fn (b: *Benchmark) anyerror!void;
/// Callback installed by the Suite so `b.run(name, f)` can start a fresh
/// adaptive prog. Keeps benchmark.zig free of a back-import on suite.zig.
pub const SubRunFn = *const fn (
ctx: *anyopaque,
sub_name: []const u8,
f: BenchFn,
) anyerror!void;
/// State for one invocation of a user benchmark function. The runner mutates
/// `n` between attempts; the user reads it inside the hot loop.
pub const Benchmark = struct {
/// Target iteration count for the current attempt. Set by the runner.
n: u64,
/// Counting-wrapped allocator. Use it inside the bench to get accurate
/// B/op and allocs/op. Backed by the GPA passed to `Suite.init`.
allocator: Allocator,
/// I/O capability — passed through so user benchmarks can do I/O if
/// they need to.
io: Io,
/// Hierarchical name used when reporting sub-benchmark results.
name: []const u8,
counter: *CountingAllocator,
sub_run: SubRunFn,
sub_run_ctx: *anyopaque,
accumulated_ns: i128 = 0,
timer_running: bool = true,
start_ts: Io.Timestamp = Io.Timestamp.zero,
bytes_processed: u64 = 0,
/// If the user calls `b.run`, this Benchmark is just a container — the
/// outer runner will skip reporting its own result.
is_container: bool = false,
/// Forces the reporter to print B/op and allocs/op even when zero.
force_report_allocs: bool = false,
/// Start (or restart) the timer fresh: zero accumulated time, zero
/// allocation counters. Call after setup and before the measured loop.
pub fn reset_timer(b: *Benchmark) void {
b.accumulated_ns = 0;
b.counter.reset();
b.bytes_processed = 0;
b.timer_running = true;
b.start_ts = Io.Timestamp.now(b.io, .awake);
}
/// Pause timing and allocation counting. Pair with `start_timer` for
/// per-iteration setup that should not be measured.
pub fn stop_timer(b: *Benchmark) void {
if (!b.timer_running) return;
const now = Io.Timestamp.now(b.io, .awake);
b.accumulated_ns += @as(i128, b.start_ts.durationTo(now).nanoseconds);
b.timer_running = false;
}
/// Resume timing after `stop_timer`.
pub fn start_timer(b: *Benchmark) void {
if (b.timer_running) return;
b.start_ts = Io.Timestamp.now(b.io, .awake);
b.timer_running = true;
}
/// Record bytes processed by this iteration; the reporter divides by
/// `ns/op` to print MB/s.
pub fn set_bytes(b: *Benchmark, bytes_per_op: u64) void {
b.bytes_processed = bytes_per_op *| b.n;
}
/// Mark this benchmark so allocations columns are always printed,
/// regardless of the `--allocs` flag.
pub fn report_allocs(b: *Benchmark) void {
b.force_report_allocs = true;
}
/// Optimization barrier — discourages the compiler from eliminating the
/// computation that produced `value`. Use to keep a result alive past
/// the loop body.
pub fn keep(b: *Benchmark, value: anytype) void {
_ = b;
std.mem.doNotOptimizeAway(value);
}
/// Run a sub-benchmark. The current benchmark becomes a container and
/// its own result is not reported; the sub-benchmark is reported as
/// `parent/sub_name`.
pub fn run(b: *Benchmark, sub_name: []const u8, f: BenchFn) !void {
b.stop_timer();
b.is_container = true;
try b.sub_run(b.sub_run_ctx, sub_name, f);
}
/// Internal: finalize accumulated time at end of one attempt.
pub fn finish(b: *Benchmark) void {
if (b.timer_running) {
const now = Io.Timestamp.now(b.io, .awake);
b.accumulated_ns += @as(i128, b.start_ts.durationTo(now).nanoseconds);
b.timer_running = false;
}
}
};

164
src/runner.zig Normal file
View File

@@ -0,0 +1,164 @@
const std = @import("std");
const bench = @import("benchmark.zig");
const stats = @import("stats.zig");
const CountingAllocator = @import("alloc.zig").CountingAllocator;
const Benchmark = bench.Benchmark;
const BenchFn = bench.BenchFn;
const SubRunFn = bench.SubRunFn;
pub const Result = struct {
name: []const u8,
n: u64,
elapsed_ns: u64,
ns_per_op: f64,
bytes_per_op: f64,
allocs_per_op: f64,
/// MB/s if the benchmark called `set_bytes`, null otherwise.
mb_per_sec: ?f64,
/// Whether the user explicitly asked for allocation columns.
force_report_allocs: bool,
/// If true, this benchmark only ran sub-benchmarks and should not be
/// reported as its own row.
is_container: bool,
};
pub const Options = struct {
min_time_ns: u64 = std.time.ns_per_s,
max_iters: u64 = 1_000_000_000,
};
/// Adaptive single-run: grow `n` until `elapsed >= min_time_ns` or
/// `n >= max_iters`. Returns the final `Result`.
pub fn run_one(
name: []const u8,
f: BenchFn,
counter: *CountingAllocator,
sub_run: SubRunFn,
sub_run_ctx: *anyopaque,
io: std.Io,
opts: Options,
) !Result {
var n: u64 = 1;
var last_elapsed_ns: u64 = 0;
var last_alloc_bytes: u64 = 0;
var last_alloc_count: u64 = 0;
var last_bytes_processed: u64 = 0;
var last_is_container: bool = false;
var last_force_report: bool = false;
while (true) {
var b: Benchmark = .{
.n = n,
.allocator = counter.allocator(),
.io = io,
.name = name,
.counter = counter,
.sub_run = sub_run,
.sub_run_ctx = sub_run_ctx,
};
counter.reset();
b.reset_timer();
try f(&b);
b.finish();
const elapsed_i: i128 = if (b.accumulated_ns < 0) 0 else b.accumulated_ns;
last_elapsed_ns = @intCast(@min(elapsed_i, std.math.maxInt(u64)));
last_alloc_bytes = counter.bytes_allocated;
last_alloc_count = counter.allocs;
last_bytes_processed = b.bytes_processed;
last_is_container = b.is_container;
last_force_report = b.force_report_allocs;
if (last_is_container) break;
if (last_elapsed_ns >= opts.min_time_ns) break;
if (n >= opts.max_iters) break;
n = next_n(n, last_elapsed_ns, opts.min_time_ns, opts.max_iters);
}
const fn_n: f64 = @floatFromInt(n);
const ns_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_elapsed_ns)) / fn_n;
const bytes_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_bytes)) / fn_n;
const allocs_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_count)) / fn_n;
const mb_per_sec: ?f64 = if (last_bytes_processed == 0 or last_elapsed_ns == 0)
null
else blk: {
const bytes_f: f64 = @floatFromInt(last_bytes_processed);
const elapsed_s: f64 = @as(f64, @floatFromInt(last_elapsed_ns)) / @as(f64, std.time.ns_per_s);
break :blk (bytes_f / (1024.0 * 1024.0)) / elapsed_s;
};
return .{
.name = name,
.n = n,
.elapsed_ns = last_elapsed_ns,
.ns_per_op = ns_per_op,
.bytes_per_op = bytes_per_op,
.allocs_per_op = allocs_per_op,
.mb_per_sec = mb_per_sec,
.force_report_allocs = last_force_report,
.is_container = last_is_container,
};
}
/// Pick the next iteration count. Strategy: predict an `n` that should land
/// at `min_time_ns` based on the last sample, overshoot by 20 %, clamp to
/// at most ×100 growth and at most `max_iters`, round up to a "nice" number.
fn next_n(prev_n: u64, prev_elapsed_ns: u64, min_time_ns: u64, max_iters: u64) u64 {
var predicted: u64 = undefined;
if (prev_elapsed_ns == 0) {
predicted = prev_n * 100;
} else {
// (min_time_ns * 1.2) * prev_n / prev_elapsed_ns, in integer math
const num = @as(u128, min_time_ns) *| 12 *| prev_n;
const denom: u128 = @as(u128, prev_elapsed_ns) *| 10;
const p = num / denom;
predicted = if (p > std.math.maxInt(u64)) std.math.maxInt(u64) else @intCast(p);
}
if (predicted <= prev_n) predicted = prev_n + 1;
if (predicted > prev_n *| 100) predicted = prev_n *| 100;
if (predicted > max_iters) predicted = max_iters;
return round_up(predicted);
}
/// Round up to a "nice" decimal number (1, 2, 3, 5, 10, 20, 30, 50, 100, ...).
/// Matches Go testing's growth heuristic — avoids reporting awkward iteration
/// counts like 1357 in favor of 2000.
fn round_up(n: u64) u64 {
if (n <= 1) return 1;
var base: u64 = 1;
while (base *| 10 < n) base *|= 10;
if (n <= base) return base;
if (n <= 2 *| base) return 2 *| base;
if (n <= 3 *| base) return 3 *| base;
if (n <= 5 *| base) return 5 *| base;
return 10 *| base;
}
test "round_up snaps to nice numbers" {
try std.testing.expectEqual(@as(u64, 1), round_up(1));
try std.testing.expectEqual(@as(u64, 2), round_up(2));
try std.testing.expectEqual(@as(u64, 3), round_up(3));
try std.testing.expectEqual(@as(u64, 5), round_up(4));
try std.testing.expectEqual(@as(u64, 5), round_up(5));
try std.testing.expectEqual(@as(u64, 10), round_up(7));
try std.testing.expectEqual(@as(u64, 100), round_up(73));
try std.testing.expectEqual(@as(u64, 1000), round_up(999));
try std.testing.expectEqual(@as(u64, 2000), round_up(1234));
try std.testing.expectEqual(@as(u64, 1_000_000), round_up(1_000_000));
}
test "next_n grows toward target" {
// first run: 0 ns -> jump by 100x
try std.testing.expectEqual(@as(u64, 100), next_n(1, 0, std.time.ns_per_s, 1 << 30));
// 100 iters in 1 ms; target 1s -> predicted = 1.2e6 * 100 / 1e6 = 120000, rounded 200000
const n2 = next_n(100, std.time.ns_per_ms, std.time.ns_per_s, 1 << 30);
try std.testing.expect(n2 > 100);
try std.testing.expect(n2 <= 100 * 100);
}

57
src/stats.zig Normal file
View File

@@ -0,0 +1,57 @@
const std = @import("std");
pub const Summary = struct {
n: usize,
min: f64,
mean: f64,
stddev: f64,
};
pub fn summarize(samples: []const f64) Summary {
if (samples.len == 0) return .{ .n = 0, .min = 0, .mean = 0, .stddev = 0 };
var min: f64 = samples[0];
var sum: f64 = 0;
for (samples) |s| {
if (s < min) min = s;
sum += s;
}
const mean = sum / @as(f64, @floatFromInt(samples.len));
var var_sum: f64 = 0;
for (samples) |s| {
const d = s - mean;
var_sum += d * d;
}
const variance = if (samples.len > 1)
var_sum / @as(f64, @floatFromInt(samples.len - 1))
else
0;
return .{
.n = samples.len,
.min = min,
.mean = mean,
.stddev = @sqrt(variance),
};
}
test "summarize empty" {
const s = summarize(&.{});
try std.testing.expectEqual(@as(usize, 0), s.n);
}
test "summarize single value" {
const s = summarize(&.{42.0});
try std.testing.expectEqual(@as(f64, 42), s.min);
try std.testing.expectEqual(@as(f64, 42), s.mean);
try std.testing.expectEqual(@as(f64, 0), s.stddev);
}
test "summarize multiple values" {
const s = summarize(&.{ 2, 4, 4, 4, 5, 5, 7, 9 });
try std.testing.expectEqual(@as(f64, 2), s.min);
try std.testing.expectEqual(@as(f64, 5), s.mean);
// sample stddev of [2,4,4,4,5,5,7,9] = sqrt(32/7) ≈ 2.138
try std.testing.expectApproxEqAbs(@as(f64, 2.1380899), s.stddev, 1e-5);
}