feat: core runner — counting allocator, stats, Benchmark, adaptive N

Adds the measurement core: - CountingAllocator: a thin wrapper over std.mem.Allocator vtable that tracks alloc/free counts and total bytes allocated, exposing a reset() used between attempts of the adaptive loop. - stats.summarize: min / mean / sample stddev over an array of f64, used to aggregate --count repetitions. - Benchmark: user-facing handle passed to each bench fn — n, allocator (counting-wrapped), io, with snake_case methods reset_timer / stop_timer / start_timer / set_bytes / report_allocs / keep / run (sub-bench). Times via std.Io.Timestamp.now(io, .awake). - runner.run_one: galloping iteration count toward --min-time, with ×100 growth cap per step and "nice-number" rounding, matching Go's testing pkg heuristic.
2026-05-21 08:12:56 +03:00
parent be72707042
commit e28eb3c22e
4 changed files with 422 additions and 0 deletions
--- a/src/alloc.zig
+++ b/src/alloc.zig
@@ -0,0 +1,94 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
 const Alignment = std.mem.Alignment;
 pub const CountingAllocator = struct {
    inner: Allocator,
    bytes_allocated: u64 = 0,
    allocs: u64 = 0,
    frees: u64 = 0,
    pub fn init(inner: Allocator) CountingAllocator {
        return .{ .inner = inner };
    }
    pub fn reset(self: *CountingAllocator) void {
        self.bytes_allocated = 0;
        self.allocs = 0;
        self.frees = 0;
    }
    pub fn allocator(self: *CountingAllocator) Allocator {
        return .{
            .ptr = self,
            .vtable = &.{
                .alloc = alloc,
                .resize = resize,
                .remap = remap,
                .free = free,
            },
        };
    }
    fn alloc(ctx: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize) ?[*]u8 {
        const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
        const result = self.inner.vtable.alloc(self.inner.ptr, len, alignment, ret_addr);
        if (result) |_| {
            self.allocs += 1;
            self.bytes_allocated += len;
        }
        return result;
    }
    fn resize(ctx: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) bool {
        const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
        const ok = self.inner.vtable.resize(self.inner.ptr, memory, alignment, new_len, ret_addr);
        if (ok and new_len > memory.len) {
            self.bytes_allocated += new_len - memory.len;
        }
        return ok;
    }
    fn remap(ctx: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) ?[*]u8 {
        const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
        const result = self.inner.vtable.remap(self.inner.ptr, memory, alignment, new_len, ret_addr);
        if (result) |_| {
            if (new_len > memory.len) self.bytes_allocated += new_len - memory.len;
        }
        return result;
    }
    fn free(ctx: *anyopaque, memory: []u8, alignment: Alignment, ret_addr: usize) void {
        const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
        self.inner.vtable.free(self.inner.ptr, memory, alignment, ret_addr);
        self.frees += 1;
    }
 };
 test "CountingAllocator counts allocations" {
    var counter = CountingAllocator.init(std.testing.allocator);
    const a = counter.allocator();
    const buf1 = try a.alloc(u8, 64);
    const buf2 = try a.alloc(u8, 128);
    try std.testing.expectEqual(@as(u64, 2), counter.allocs);
    try std.testing.expectEqual(@as(u64, 192), counter.bytes_allocated);
    a.free(buf1);
    a.free(buf2);
    try std.testing.expectEqual(@as(u64, 2), counter.frees);
 }
 test "CountingAllocator reset clears counters" {
    var counter = CountingAllocator.init(std.testing.allocator);
    const a = counter.allocator();
    const buf = try a.alloc(u8, 32);
    defer a.free(buf);
    try std.testing.expect(counter.allocs > 0);
    counter.reset();
    try std.testing.expectEqual(@as(u64, 0), counter.allocs);
    try std.testing.expectEqual(@as(u64, 0), counter.bytes_allocated);
    try std.testing.expectEqual(@as(u64, 0), counter.frees);
 }
--- a/src/benchmark.zig
+++ b/src/benchmark.zig
@@ -0,0 +1,107 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
 const Io = std.Io;
 const CountingAllocator = @import("alloc.zig").CountingAllocator;
 pub const BenchFn = *const fn (b: *Benchmark) anyerror!void;
 /// Callback installed by the Suite so `b.run(name, f)` can start a fresh
 /// adaptive prog. Keeps benchmark.zig free of a back-import on suite.zig.
 pub const SubRunFn = *const fn (
    ctx: *anyopaque,
    sub_name: []const u8,
    f: BenchFn,
 ) anyerror!void;
 /// State for one invocation of a user benchmark function. The runner mutates
 /// `n` between attempts; the user reads it inside the hot loop.
 pub const Benchmark = struct {
    /// Target iteration count for the current attempt. Set by the runner.
    n: u64,
    /// Counting-wrapped allocator. Use it inside the bench to get accurate
    /// B/op and allocs/op. Backed by the GPA passed to `Suite.init`.
    allocator: Allocator,
    /// I/O capability — passed through so user benchmarks can do I/O if
    /// they need to.
    io: Io,
    /// Hierarchical name used when reporting sub-benchmark results.
    name: []const u8,
    counter: *CountingAllocator,
    sub_run: SubRunFn,
    sub_run_ctx: *anyopaque,
    accumulated_ns: i128 = 0,
    timer_running: bool = true,
    start_ts: Io.Timestamp = Io.Timestamp.zero,
    bytes_processed: u64 = 0,
    /// If the user calls `b.run`, this Benchmark is just a container — the
    /// outer runner will skip reporting its own result.
    is_container: bool = false,
    /// Forces the reporter to print B/op and allocs/op even when zero.
    force_report_allocs: bool = false,
    /// Start (or restart) the timer fresh: zero accumulated time, zero
    /// allocation counters. Call after setup and before the measured loop.
    pub fn reset_timer(b: *Benchmark) void {
        b.accumulated_ns = 0;
        b.counter.reset();
        b.bytes_processed = 0;
        b.timer_running = true;
        b.start_ts = Io.Timestamp.now(b.io, .awake);
    }
    /// Pause timing and allocation counting. Pair with `start_timer` for
    /// per-iteration setup that should not be measured.
    pub fn stop_timer(b: *Benchmark) void {
        if (!b.timer_running) return;
        const now = Io.Timestamp.now(b.io, .awake);
        b.accumulated_ns += @as(i128, b.start_ts.durationTo(now).nanoseconds);
        b.timer_running = false;
    }
    /// Resume timing after `stop_timer`.
    pub fn start_timer(b: *Benchmark) void {
        if (b.timer_running) return;
        b.start_ts = Io.Timestamp.now(b.io, .awake);
        b.timer_running = true;
    }
    /// Record bytes processed by this iteration; the reporter divides by
    /// `ns/op` to print MB/s.
    pub fn set_bytes(b: *Benchmark, bytes_per_op: u64) void {
        b.bytes_processed = bytes_per_op *| b.n;
    }
    /// Mark this benchmark so allocations columns are always printed,
    /// regardless of the `--allocs` flag.
    pub fn report_allocs(b: *Benchmark) void {
        b.force_report_allocs = true;
    }
    /// Optimization barrier — discourages the compiler from eliminating the
    /// computation that produced `value`. Use to keep a result alive past
    /// the loop body.
    pub fn keep(b: *Benchmark, value: anytype) void {
        _ = b;
        std.mem.doNotOptimizeAway(value);
    }
    /// Run a sub-benchmark. The current benchmark becomes a container and
    /// its own result is not reported; the sub-benchmark is reported as
    /// `parent/sub_name`.
    pub fn run(b: *Benchmark, sub_name: []const u8, f: BenchFn) !void {
        b.stop_timer();
        b.is_container = true;
        try b.sub_run(b.sub_run_ctx, sub_name, f);
    }
    /// Internal: finalize accumulated time at end of one attempt.
    pub fn finish(b: *Benchmark) void {
        if (b.timer_running) {
            const now = Io.Timestamp.now(b.io, .awake);
            b.accumulated_ns += @as(i128, b.start_ts.durationTo(now).nanoseconds);
            b.timer_running = false;
        }
    }
 };
--- a/src/runner.zig
+++ b/src/runner.zig
@@ -0,0 +1,164 @@
 const std = @import("std");
 const bench = @import("benchmark.zig");
 const stats = @import("stats.zig");
 const CountingAllocator = @import("alloc.zig").CountingAllocator;
 const Benchmark = bench.Benchmark;
 const BenchFn = bench.BenchFn;
 const SubRunFn = bench.SubRunFn;
 pub const Result = struct {
    name: []const u8,
    n: u64,
    elapsed_ns: u64,
    ns_per_op: f64,
    bytes_per_op: f64,
    allocs_per_op: f64,
    /// MB/s if the benchmark called `set_bytes`, null otherwise.
    mb_per_sec: ?f64,
    /// Whether the user explicitly asked for allocation columns.
    force_report_allocs: bool,
    /// If true, this benchmark only ran sub-benchmarks and should not be
    /// reported as its own row.
    is_container: bool,
 };
 pub const Options = struct {
    min_time_ns: u64 = std.time.ns_per_s,
    max_iters: u64 = 1_000_000_000,
 };
 /// Adaptive single-run: grow `n` until `elapsed >= min_time_ns` or
 /// `n >= max_iters`. Returns the final `Result`.
 pub fn run_one(
    name: []const u8,
    f: BenchFn,
    counter: *CountingAllocator,
    sub_run: SubRunFn,
    sub_run_ctx: *anyopaque,
    io: std.Io,
    opts: Options,
 ) !Result {
    var n: u64 = 1;
    var last_elapsed_ns: u64 = 0;
    var last_alloc_bytes: u64 = 0;
    var last_alloc_count: u64 = 0;
    var last_bytes_processed: u64 = 0;
    var last_is_container: bool = false;
    var last_force_report: bool = false;
    while (true) {
        var b: Benchmark = .{
            .n = n,
            .allocator = counter.allocator(),
            .io = io,
            .name = name,
            .counter = counter,
            .sub_run = sub_run,
            .sub_run_ctx = sub_run_ctx,
        };
        counter.reset();
        b.reset_timer();
        try f(&b);
        b.finish();
        const elapsed_i: i128 = if (b.accumulated_ns < 0) 0 else b.accumulated_ns;
        last_elapsed_ns = @intCast(@min(elapsed_i, std.math.maxInt(u64)));
        last_alloc_bytes = counter.bytes_allocated;
        last_alloc_count = counter.allocs;
        last_bytes_processed = b.bytes_processed;
        last_is_container = b.is_container;
        last_force_report = b.force_report_allocs;
        if (last_is_container) break;
        if (last_elapsed_ns >= opts.min_time_ns) break;
        if (n >= opts.max_iters) break;
        n = next_n(n, last_elapsed_ns, opts.min_time_ns, opts.max_iters);
    }
    const fn_n: f64 = @floatFromInt(n);
    const ns_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_elapsed_ns)) / fn_n;
    const bytes_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_bytes)) / fn_n;
    const allocs_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_count)) / fn_n;
    const mb_per_sec: ?f64 = if (last_bytes_processed == 0 or last_elapsed_ns == 0)
        null
    else blk: {
        const bytes_f: f64 = @floatFromInt(last_bytes_processed);
        const elapsed_s: f64 = @as(f64, @floatFromInt(last_elapsed_ns)) / @as(f64, std.time.ns_per_s);
        break :blk (bytes_f / (1024.0 * 1024.0)) / elapsed_s;
    };
    return .{
        .name = name,
        .n = n,
        .elapsed_ns = last_elapsed_ns,
        .ns_per_op = ns_per_op,
        .bytes_per_op = bytes_per_op,
        .allocs_per_op = allocs_per_op,
        .mb_per_sec = mb_per_sec,
        .force_report_allocs = last_force_report,
        .is_container = last_is_container,
    };
 }
 /// Pick the next iteration count. Strategy: predict an `n` that should land
 /// at `min_time_ns` based on the last sample, overshoot by 20 %, clamp to
 /// at most ×100 growth and at most `max_iters`, round up to a "nice" number.
 fn next_n(prev_n: u64, prev_elapsed_ns: u64, min_time_ns: u64, max_iters: u64) u64 {
    var predicted: u64 = undefined;
    if (prev_elapsed_ns == 0) {
        predicted = prev_n * 100;
    } else {
        // (min_time_ns * 1.2) * prev_n / prev_elapsed_ns, in integer math
        const num = @as(u128, min_time_ns) *| 12 *| prev_n;
        const denom: u128 = @as(u128, prev_elapsed_ns) *| 10;
        const p = num / denom;
        predicted = if (p > std.math.maxInt(u64)) std.math.maxInt(u64) else @intCast(p);
    }
    if (predicted <= prev_n) predicted = prev_n + 1;
    if (predicted > prev_n *| 100) predicted = prev_n *| 100;
    if (predicted > max_iters) predicted = max_iters;
    return round_up(predicted);
 }
 /// Round up to a "nice" decimal number (1, 2, 3, 5, 10, 20, 30, 50, 100, ...).
 /// Matches Go testing's growth heuristic — avoids reporting awkward iteration
 /// counts like 1357 in favor of 2000.
 fn round_up(n: u64) u64 {
    if (n <= 1) return 1;
    var base: u64 = 1;
    while (base *| 10 < n) base *|= 10;
    if (n <= base) return base;
    if (n <= 2 *| base) return 2 *| base;
    if (n <= 3 *| base) return 3 *| base;
    if (n <= 5 *| base) return 5 *| base;
    return 10 *| base;
 }
 test "round_up snaps to nice numbers" {
    try std.testing.expectEqual(@as(u64, 1), round_up(1));
    try std.testing.expectEqual(@as(u64, 2), round_up(2));
    try std.testing.expectEqual(@as(u64, 3), round_up(3));
    try std.testing.expectEqual(@as(u64, 5), round_up(4));
    try std.testing.expectEqual(@as(u64, 5), round_up(5));
    try std.testing.expectEqual(@as(u64, 10), round_up(7));
    try std.testing.expectEqual(@as(u64, 100), round_up(73));
    try std.testing.expectEqual(@as(u64, 1000), round_up(999));
    try std.testing.expectEqual(@as(u64, 2000), round_up(1234));
    try std.testing.expectEqual(@as(u64, 1_000_000), round_up(1_000_000));
 }
 test "next_n grows toward target" {
    // first run: 0 ns -> jump by 100x
    try std.testing.expectEqual(@as(u64, 100), next_n(1, 0, std.time.ns_per_s, 1 << 30));
    // 100 iters in 1 ms; target 1s -> predicted = 1.2e6 * 100 / 1e6 = 120000, rounded 200000
    const n2 = next_n(100, std.time.ns_per_ms, std.time.ns_per_s, 1 << 30);
    try std.testing.expect(n2 > 100);
    try std.testing.expect(n2 <= 100 * 100);
 }
--- a/src/stats.zig
+++ b/src/stats.zig
@@ -0,0 +1,57 @@
 const std = @import("std");
 pub const Summary = struct {
    n: usize,
    min: f64,
    mean: f64,
    stddev: f64,
 };
 pub fn summarize(samples: []const f64) Summary {
    if (samples.len == 0) return .{ .n = 0, .min = 0, .mean = 0, .stddev = 0 };
    var min: f64 = samples[0];
    var sum: f64 = 0;
    for (samples) |s| {
        if (s < min) min = s;
        sum += s;
    }
    const mean = sum / @as(f64, @floatFromInt(samples.len));
    var var_sum: f64 = 0;
    for (samples) |s| {
        const d = s - mean;
        var_sum += d * d;
    }
    const variance = if (samples.len > 1)
        var_sum / @as(f64, @floatFromInt(samples.len - 1))
    else
        0;
    return .{
        .n = samples.len,
        .min = min,
        .mean = mean,
        .stddev = @sqrt(variance),
    };
 }
 test "summarize empty" {
    const s = summarize(&.{});
    try std.testing.expectEqual(@as(usize, 0), s.n);
 }
 test "summarize single value" {
    const s = summarize(&.{42.0});
    try std.testing.expectEqual(@as(f64, 42), s.min);
    try std.testing.expectEqual(@as(f64, 42), s.mean);
    try std.testing.expectEqual(@as(f64, 0), s.stddev);
 }
 test "summarize multiple values" {
    const s = summarize(&.{ 2, 4, 4, 4, 5, 5, 7, 9 });
    try std.testing.expectEqual(@as(f64, 2), s.min);
    try std.testing.expectEqual(@as(f64, 5), s.mean);
    // sample stddev of [2,4,4,4,5,5,7,9] = sqrt(32/7) ≈ 2.138
    try std.testing.expectApproxEqAbs(@as(f64, 2.1380899), s.stddev, 1e-5);
 }