feat: core runner — counting allocator, stats, Benchmark, adaptive N
Adds the measurement core: - CountingAllocator: a thin wrapper over std.mem.Allocator vtable that tracks alloc/free counts and total bytes allocated, exposing a reset() used between attempts of the adaptive loop. - stats.summarize: min / mean / sample stddev over an array of f64, used to aggregate --count repetitions. - Benchmark: user-facing handle passed to each bench fn — n, allocator (counting-wrapped), io, with snake_case methods reset_timer / stop_timer / start_timer / set_bytes / report_allocs / keep / run (sub-bench). Times via std.Io.Timestamp.now(io, .awake). - runner.run_one: galloping iteration count toward --min-time, with ×100 growth cap per step and "nice-number" rounding, matching Go's testing pkg heuristic.
This commit is contained in:
94
src/alloc.zig
Normal file
94
src/alloc.zig
Normal file
@@ -0,0 +1,94 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const Alignment = std.mem.Alignment;
|
||||
|
||||
pub const CountingAllocator = struct {
|
||||
inner: Allocator,
|
||||
bytes_allocated: u64 = 0,
|
||||
allocs: u64 = 0,
|
||||
frees: u64 = 0,
|
||||
|
||||
pub fn init(inner: Allocator) CountingAllocator {
|
||||
return .{ .inner = inner };
|
||||
}
|
||||
|
||||
pub fn reset(self: *CountingAllocator) void {
|
||||
self.bytes_allocated = 0;
|
||||
self.allocs = 0;
|
||||
self.frees = 0;
|
||||
}
|
||||
|
||||
pub fn allocator(self: *CountingAllocator) Allocator {
|
||||
return .{
|
||||
.ptr = self,
|
||||
.vtable = &.{
|
||||
.alloc = alloc,
|
||||
.resize = resize,
|
||||
.remap = remap,
|
||||
.free = free,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
fn alloc(ctx: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize) ?[*]u8 {
|
||||
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
|
||||
const result = self.inner.vtable.alloc(self.inner.ptr, len, alignment, ret_addr);
|
||||
if (result) |_| {
|
||||
self.allocs += 1;
|
||||
self.bytes_allocated += len;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
fn resize(ctx: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) bool {
|
||||
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
|
||||
const ok = self.inner.vtable.resize(self.inner.ptr, memory, alignment, new_len, ret_addr);
|
||||
if (ok and new_len > memory.len) {
|
||||
self.bytes_allocated += new_len - memory.len;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
fn remap(ctx: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) ?[*]u8 {
|
||||
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
|
||||
const result = self.inner.vtable.remap(self.inner.ptr, memory, alignment, new_len, ret_addr);
|
||||
if (result) |_| {
|
||||
if (new_len > memory.len) self.bytes_allocated += new_len - memory.len;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
fn free(ctx: *anyopaque, memory: []u8, alignment: Alignment, ret_addr: usize) void {
|
||||
const self: *CountingAllocator = @ptrCast(@alignCast(ctx));
|
||||
self.inner.vtable.free(self.inner.ptr, memory, alignment, ret_addr);
|
||||
self.frees += 1;
|
||||
}
|
||||
};
|
||||
|
||||
test "CountingAllocator counts allocations" {
|
||||
var counter = CountingAllocator.init(std.testing.allocator);
|
||||
const a = counter.allocator();
|
||||
|
||||
const buf1 = try a.alloc(u8, 64);
|
||||
const buf2 = try a.alloc(u8, 128);
|
||||
try std.testing.expectEqual(@as(u64, 2), counter.allocs);
|
||||
try std.testing.expectEqual(@as(u64, 192), counter.bytes_allocated);
|
||||
|
||||
a.free(buf1);
|
||||
a.free(buf2);
|
||||
try std.testing.expectEqual(@as(u64, 2), counter.frees);
|
||||
}
|
||||
|
||||
test "CountingAllocator reset clears counters" {
|
||||
var counter = CountingAllocator.init(std.testing.allocator);
|
||||
const a = counter.allocator();
|
||||
|
||||
const buf = try a.alloc(u8, 32);
|
||||
defer a.free(buf);
|
||||
try std.testing.expect(counter.allocs > 0);
|
||||
|
||||
counter.reset();
|
||||
try std.testing.expectEqual(@as(u64, 0), counter.allocs);
|
||||
try std.testing.expectEqual(@as(u64, 0), counter.bytes_allocated);
|
||||
try std.testing.expectEqual(@as(u64, 0), counter.frees);
|
||||
}
|
||||
107
src/benchmark.zig
Normal file
107
src/benchmark.zig
Normal file
@@ -0,0 +1,107 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const Io = std.Io;
|
||||
const CountingAllocator = @import("alloc.zig").CountingAllocator;
|
||||
|
||||
pub const BenchFn = *const fn (b: *Benchmark) anyerror!void;
|
||||
|
||||
/// Callback installed by the Suite so `b.run(name, f)` can start a fresh
|
||||
/// adaptive prog. Keeps benchmark.zig free of a back-import on suite.zig.
|
||||
pub const SubRunFn = *const fn (
|
||||
ctx: *anyopaque,
|
||||
sub_name: []const u8,
|
||||
f: BenchFn,
|
||||
) anyerror!void;
|
||||
|
||||
/// State for one invocation of a user benchmark function. The runner mutates
|
||||
/// `n` between attempts; the user reads it inside the hot loop.
|
||||
pub const Benchmark = struct {
|
||||
/// Target iteration count for the current attempt. Set by the runner.
|
||||
n: u64,
|
||||
/// Counting-wrapped allocator. Use it inside the bench to get accurate
|
||||
/// B/op and allocs/op. Backed by the GPA passed to `Suite.init`.
|
||||
allocator: Allocator,
|
||||
/// I/O capability — passed through so user benchmarks can do I/O if
|
||||
/// they need to.
|
||||
io: Io,
|
||||
/// Hierarchical name used when reporting sub-benchmark results.
|
||||
name: []const u8,
|
||||
|
||||
counter: *CountingAllocator,
|
||||
sub_run: SubRunFn,
|
||||
sub_run_ctx: *anyopaque,
|
||||
|
||||
accumulated_ns: i128 = 0,
|
||||
timer_running: bool = true,
|
||||
start_ts: Io.Timestamp = Io.Timestamp.zero,
|
||||
bytes_processed: u64 = 0,
|
||||
/// If the user calls `b.run`, this Benchmark is just a container — the
|
||||
/// outer runner will skip reporting its own result.
|
||||
is_container: bool = false,
|
||||
/// Forces the reporter to print B/op and allocs/op even when zero.
|
||||
force_report_allocs: bool = false,
|
||||
|
||||
/// Start (or restart) the timer fresh: zero accumulated time, zero
|
||||
/// allocation counters. Call after setup and before the measured loop.
|
||||
pub fn reset_timer(b: *Benchmark) void {
|
||||
b.accumulated_ns = 0;
|
||||
b.counter.reset();
|
||||
b.bytes_processed = 0;
|
||||
b.timer_running = true;
|
||||
b.start_ts = Io.Timestamp.now(b.io, .awake);
|
||||
}
|
||||
|
||||
/// Pause timing and allocation counting. Pair with `start_timer` for
|
||||
/// per-iteration setup that should not be measured.
|
||||
pub fn stop_timer(b: *Benchmark) void {
|
||||
if (!b.timer_running) return;
|
||||
const now = Io.Timestamp.now(b.io, .awake);
|
||||
b.accumulated_ns += @as(i128, b.start_ts.durationTo(now).nanoseconds);
|
||||
b.timer_running = false;
|
||||
}
|
||||
|
||||
/// Resume timing after `stop_timer`.
|
||||
pub fn start_timer(b: *Benchmark) void {
|
||||
if (b.timer_running) return;
|
||||
b.start_ts = Io.Timestamp.now(b.io, .awake);
|
||||
b.timer_running = true;
|
||||
}
|
||||
|
||||
/// Record bytes processed by this iteration; the reporter divides by
|
||||
/// `ns/op` to print MB/s.
|
||||
pub fn set_bytes(b: *Benchmark, bytes_per_op: u64) void {
|
||||
b.bytes_processed = bytes_per_op *| b.n;
|
||||
}
|
||||
|
||||
/// Mark this benchmark so allocations columns are always printed,
|
||||
/// regardless of the `--allocs` flag.
|
||||
pub fn report_allocs(b: *Benchmark) void {
|
||||
b.force_report_allocs = true;
|
||||
}
|
||||
|
||||
/// Optimization barrier — discourages the compiler from eliminating the
|
||||
/// computation that produced `value`. Use to keep a result alive past
|
||||
/// the loop body.
|
||||
pub fn keep(b: *Benchmark, value: anytype) void {
|
||||
_ = b;
|
||||
std.mem.doNotOptimizeAway(value);
|
||||
}
|
||||
|
||||
/// Run a sub-benchmark. The current benchmark becomes a container and
|
||||
/// its own result is not reported; the sub-benchmark is reported as
|
||||
/// `parent/sub_name`.
|
||||
pub fn run(b: *Benchmark, sub_name: []const u8, f: BenchFn) !void {
|
||||
b.stop_timer();
|
||||
b.is_container = true;
|
||||
try b.sub_run(b.sub_run_ctx, sub_name, f);
|
||||
}
|
||||
|
||||
/// Internal: finalize accumulated time at end of one attempt.
|
||||
pub fn finish(b: *Benchmark) void {
|
||||
if (b.timer_running) {
|
||||
const now = Io.Timestamp.now(b.io, .awake);
|
||||
b.accumulated_ns += @as(i128, b.start_ts.durationTo(now).nanoseconds);
|
||||
b.timer_running = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
164
src/runner.zig
Normal file
164
src/runner.zig
Normal file
@@ -0,0 +1,164 @@
|
||||
const std = @import("std");
|
||||
const bench = @import("benchmark.zig");
|
||||
const stats = @import("stats.zig");
|
||||
const CountingAllocator = @import("alloc.zig").CountingAllocator;
|
||||
|
||||
const Benchmark = bench.Benchmark;
|
||||
const BenchFn = bench.BenchFn;
|
||||
const SubRunFn = bench.SubRunFn;
|
||||
|
||||
pub const Result = struct {
|
||||
name: []const u8,
|
||||
n: u64,
|
||||
elapsed_ns: u64,
|
||||
ns_per_op: f64,
|
||||
bytes_per_op: f64,
|
||||
allocs_per_op: f64,
|
||||
/// MB/s if the benchmark called `set_bytes`, null otherwise.
|
||||
mb_per_sec: ?f64,
|
||||
/// Whether the user explicitly asked for allocation columns.
|
||||
force_report_allocs: bool,
|
||||
/// If true, this benchmark only ran sub-benchmarks and should not be
|
||||
/// reported as its own row.
|
||||
is_container: bool,
|
||||
};
|
||||
|
||||
pub const Options = struct {
|
||||
min_time_ns: u64 = std.time.ns_per_s,
|
||||
max_iters: u64 = 1_000_000_000,
|
||||
};
|
||||
|
||||
/// Adaptive single-run: grow `n` until `elapsed >= min_time_ns` or
|
||||
/// `n >= max_iters`. Returns the final `Result`.
|
||||
pub fn run_one(
|
||||
name: []const u8,
|
||||
f: BenchFn,
|
||||
counter: *CountingAllocator,
|
||||
sub_run: SubRunFn,
|
||||
sub_run_ctx: *anyopaque,
|
||||
io: std.Io,
|
||||
opts: Options,
|
||||
) !Result {
|
||||
var n: u64 = 1;
|
||||
var last_elapsed_ns: u64 = 0;
|
||||
var last_alloc_bytes: u64 = 0;
|
||||
var last_alloc_count: u64 = 0;
|
||||
var last_bytes_processed: u64 = 0;
|
||||
var last_is_container: bool = false;
|
||||
var last_force_report: bool = false;
|
||||
|
||||
while (true) {
|
||||
var b: Benchmark = .{
|
||||
.n = n,
|
||||
.allocator = counter.allocator(),
|
||||
.io = io,
|
||||
.name = name,
|
||||
.counter = counter,
|
||||
.sub_run = sub_run,
|
||||
.sub_run_ctx = sub_run_ctx,
|
||||
};
|
||||
counter.reset();
|
||||
b.reset_timer();
|
||||
|
||||
try f(&b);
|
||||
|
||||
b.finish();
|
||||
|
||||
const elapsed_i: i128 = if (b.accumulated_ns < 0) 0 else b.accumulated_ns;
|
||||
last_elapsed_ns = @intCast(@min(elapsed_i, std.math.maxInt(u64)));
|
||||
last_alloc_bytes = counter.bytes_allocated;
|
||||
last_alloc_count = counter.allocs;
|
||||
last_bytes_processed = b.bytes_processed;
|
||||
last_is_container = b.is_container;
|
||||
last_force_report = b.force_report_allocs;
|
||||
|
||||
if (last_is_container) break;
|
||||
if (last_elapsed_ns >= opts.min_time_ns) break;
|
||||
if (n >= opts.max_iters) break;
|
||||
|
||||
n = next_n(n, last_elapsed_ns, opts.min_time_ns, opts.max_iters);
|
||||
}
|
||||
|
||||
const fn_n: f64 = @floatFromInt(n);
|
||||
const ns_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_elapsed_ns)) / fn_n;
|
||||
const bytes_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_bytes)) / fn_n;
|
||||
const allocs_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_count)) / fn_n;
|
||||
|
||||
const mb_per_sec: ?f64 = if (last_bytes_processed == 0 or last_elapsed_ns == 0)
|
||||
null
|
||||
else blk: {
|
||||
const bytes_f: f64 = @floatFromInt(last_bytes_processed);
|
||||
const elapsed_s: f64 = @as(f64, @floatFromInt(last_elapsed_ns)) / @as(f64, std.time.ns_per_s);
|
||||
break :blk (bytes_f / (1024.0 * 1024.0)) / elapsed_s;
|
||||
};
|
||||
|
||||
return .{
|
||||
.name = name,
|
||||
.n = n,
|
||||
.elapsed_ns = last_elapsed_ns,
|
||||
.ns_per_op = ns_per_op,
|
||||
.bytes_per_op = bytes_per_op,
|
||||
.allocs_per_op = allocs_per_op,
|
||||
.mb_per_sec = mb_per_sec,
|
||||
.force_report_allocs = last_force_report,
|
||||
.is_container = last_is_container,
|
||||
};
|
||||
}
|
||||
|
||||
/// Pick the next iteration count. Strategy: predict an `n` that should land
|
||||
/// at `min_time_ns` based on the last sample, overshoot by 20 %, clamp to
|
||||
/// at most ×100 growth and at most `max_iters`, round up to a "nice" number.
|
||||
fn next_n(prev_n: u64, prev_elapsed_ns: u64, min_time_ns: u64, max_iters: u64) u64 {
|
||||
var predicted: u64 = undefined;
|
||||
if (prev_elapsed_ns == 0) {
|
||||
predicted = prev_n * 100;
|
||||
} else {
|
||||
// (min_time_ns * 1.2) * prev_n / prev_elapsed_ns, in integer math
|
||||
const num = @as(u128, min_time_ns) *| 12 *| prev_n;
|
||||
const denom: u128 = @as(u128, prev_elapsed_ns) *| 10;
|
||||
const p = num / denom;
|
||||
predicted = if (p > std.math.maxInt(u64)) std.math.maxInt(u64) else @intCast(p);
|
||||
}
|
||||
|
||||
if (predicted <= prev_n) predicted = prev_n + 1;
|
||||
if (predicted > prev_n *| 100) predicted = prev_n *| 100;
|
||||
if (predicted > max_iters) predicted = max_iters;
|
||||
return round_up(predicted);
|
||||
}
|
||||
|
||||
/// Round up to a "nice" decimal number (1, 2, 3, 5, 10, 20, 30, 50, 100, ...).
|
||||
/// Matches Go testing's growth heuristic — avoids reporting awkward iteration
|
||||
/// counts like 1357 in favor of 2000.
|
||||
fn round_up(n: u64) u64 {
|
||||
if (n <= 1) return 1;
|
||||
var base: u64 = 1;
|
||||
while (base *| 10 < n) base *|= 10;
|
||||
if (n <= base) return base;
|
||||
if (n <= 2 *| base) return 2 *| base;
|
||||
if (n <= 3 *| base) return 3 *| base;
|
||||
if (n <= 5 *| base) return 5 *| base;
|
||||
return 10 *| base;
|
||||
}
|
||||
|
||||
test "round_up snaps to nice numbers" {
|
||||
try std.testing.expectEqual(@as(u64, 1), round_up(1));
|
||||
try std.testing.expectEqual(@as(u64, 2), round_up(2));
|
||||
try std.testing.expectEqual(@as(u64, 3), round_up(3));
|
||||
try std.testing.expectEqual(@as(u64, 5), round_up(4));
|
||||
try std.testing.expectEqual(@as(u64, 5), round_up(5));
|
||||
try std.testing.expectEqual(@as(u64, 10), round_up(7));
|
||||
try std.testing.expectEqual(@as(u64, 100), round_up(73));
|
||||
try std.testing.expectEqual(@as(u64, 1000), round_up(999));
|
||||
try std.testing.expectEqual(@as(u64, 2000), round_up(1234));
|
||||
try std.testing.expectEqual(@as(u64, 1_000_000), round_up(1_000_000));
|
||||
}
|
||||
|
||||
test "next_n grows toward target" {
|
||||
// first run: 0 ns -> jump by 100x
|
||||
try std.testing.expectEqual(@as(u64, 100), next_n(1, 0, std.time.ns_per_s, 1 << 30));
|
||||
|
||||
// 100 iters in 1 ms; target 1s -> predicted = 1.2e6 * 100 / 1e6 = 120000, rounded 200000
|
||||
const n2 = next_n(100, std.time.ns_per_ms, std.time.ns_per_s, 1 << 30);
|
||||
try std.testing.expect(n2 > 100);
|
||||
try std.testing.expect(n2 <= 100 * 100);
|
||||
}
|
||||
57
src/stats.zig
Normal file
57
src/stats.zig
Normal file
@@ -0,0 +1,57 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub const Summary = struct {
|
||||
n: usize,
|
||||
min: f64,
|
||||
mean: f64,
|
||||
stddev: f64,
|
||||
};
|
||||
|
||||
pub fn summarize(samples: []const f64) Summary {
|
||||
if (samples.len == 0) return .{ .n = 0, .min = 0, .mean = 0, .stddev = 0 };
|
||||
|
||||
var min: f64 = samples[0];
|
||||
var sum: f64 = 0;
|
||||
for (samples) |s| {
|
||||
if (s < min) min = s;
|
||||
sum += s;
|
||||
}
|
||||
const mean = sum / @as(f64, @floatFromInt(samples.len));
|
||||
|
||||
var var_sum: f64 = 0;
|
||||
for (samples) |s| {
|
||||
const d = s - mean;
|
||||
var_sum += d * d;
|
||||
}
|
||||
const variance = if (samples.len > 1)
|
||||
var_sum / @as(f64, @floatFromInt(samples.len - 1))
|
||||
else
|
||||
0;
|
||||
|
||||
return .{
|
||||
.n = samples.len,
|
||||
.min = min,
|
||||
.mean = mean,
|
||||
.stddev = @sqrt(variance),
|
||||
};
|
||||
}
|
||||
|
||||
test "summarize empty" {
|
||||
const s = summarize(&.{});
|
||||
try std.testing.expectEqual(@as(usize, 0), s.n);
|
||||
}
|
||||
|
||||
test "summarize single value" {
|
||||
const s = summarize(&.{42.0});
|
||||
try std.testing.expectEqual(@as(f64, 42), s.min);
|
||||
try std.testing.expectEqual(@as(f64, 42), s.mean);
|
||||
try std.testing.expectEqual(@as(f64, 0), s.stddev);
|
||||
}
|
||||
|
||||
test "summarize multiple values" {
|
||||
const s = summarize(&.{ 2, 4, 4, 4, 5, 5, 7, 9 });
|
||||
try std.testing.expectEqual(@as(f64, 2), s.min);
|
||||
try std.testing.expectEqual(@as(f64, 5), s.mean);
|
||||
// sample stddev of [2,4,4,4,5,5,7,9] = sqrt(32/7) ≈ 2.138
|
||||
try std.testing.expectApproxEqAbs(@as(f64, 2.1380899), s.stddev, 1e-5);
|
||||
}
|
||||
Reference in New Issue
Block a user