diff --git a/src/alloc.zig b/src/alloc.zig new file mode 100644 index 0000000..5038aac --- /dev/null +++ b/src/alloc.zig @@ -0,0 +1,94 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Alignment = std.mem.Alignment; + +pub const CountingAllocator = struct { + inner: Allocator, + bytes_allocated: u64 = 0, + allocs: u64 = 0, + frees: u64 = 0, + + pub fn init(inner: Allocator) CountingAllocator { + return .{ .inner = inner }; + } + + pub fn reset(self: *CountingAllocator) void { + self.bytes_allocated = 0; + self.allocs = 0; + self.frees = 0; + } + + pub fn allocator(self: *CountingAllocator) Allocator { + return .{ + .ptr = self, + .vtable = &.{ + .alloc = alloc, + .resize = resize, + .remap = remap, + .free = free, + }, + }; + } + + fn alloc(ctx: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize) ?[*]u8 { + const self: *CountingAllocator = @ptrCast(@alignCast(ctx)); + const result = self.inner.vtable.alloc(self.inner.ptr, len, alignment, ret_addr); + if (result) |_| { + self.allocs += 1; + self.bytes_allocated += len; + } + return result; + } + + fn resize(ctx: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) bool { + const self: *CountingAllocator = @ptrCast(@alignCast(ctx)); + const ok = self.inner.vtable.resize(self.inner.ptr, memory, alignment, new_len, ret_addr); + if (ok and new_len > memory.len) { + self.bytes_allocated += new_len - memory.len; + } + return ok; + } + + fn remap(ctx: *anyopaque, memory: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) ?[*]u8 { + const self: *CountingAllocator = @ptrCast(@alignCast(ctx)); + const result = self.inner.vtable.remap(self.inner.ptr, memory, alignment, new_len, ret_addr); + if (result) |_| { + if (new_len > memory.len) self.bytes_allocated += new_len - memory.len; + } + return result; + } + + fn free(ctx: *anyopaque, memory: []u8, alignment: Alignment, ret_addr: usize) void { + const self: *CountingAllocator = @ptrCast(@alignCast(ctx)); + self.inner.vtable.free(self.inner.ptr, memory, alignment, ret_addr); + self.frees += 1; + } +}; + +test "CountingAllocator counts allocations" { + var counter = CountingAllocator.init(std.testing.allocator); + const a = counter.allocator(); + + const buf1 = try a.alloc(u8, 64); + const buf2 = try a.alloc(u8, 128); + try std.testing.expectEqual(@as(u64, 2), counter.allocs); + try std.testing.expectEqual(@as(u64, 192), counter.bytes_allocated); + + a.free(buf1); + a.free(buf2); + try std.testing.expectEqual(@as(u64, 2), counter.frees); +} + +test "CountingAllocator reset clears counters" { + var counter = CountingAllocator.init(std.testing.allocator); + const a = counter.allocator(); + + const buf = try a.alloc(u8, 32); + defer a.free(buf); + try std.testing.expect(counter.allocs > 0); + + counter.reset(); + try std.testing.expectEqual(@as(u64, 0), counter.allocs); + try std.testing.expectEqual(@as(u64, 0), counter.bytes_allocated); + try std.testing.expectEqual(@as(u64, 0), counter.frees); +} diff --git a/src/benchmark.zig b/src/benchmark.zig new file mode 100644 index 0000000..d9747b6 --- /dev/null +++ b/src/benchmark.zig @@ -0,0 +1,107 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Io = std.Io; +const CountingAllocator = @import("alloc.zig").CountingAllocator; + +pub const BenchFn = *const fn (b: *Benchmark) anyerror!void; + +/// Callback installed by the Suite so `b.run(name, f)` can start a fresh +/// adaptive prog. Keeps benchmark.zig free of a back-import on suite.zig. +pub const SubRunFn = *const fn ( + ctx: *anyopaque, + sub_name: []const u8, + f: BenchFn, +) anyerror!void; + +/// State for one invocation of a user benchmark function. The runner mutates +/// `n` between attempts; the user reads it inside the hot loop. +pub const Benchmark = struct { + /// Target iteration count for the current attempt. Set by the runner. + n: u64, + /// Counting-wrapped allocator. Use it inside the bench to get accurate + /// B/op and allocs/op. Backed by the GPA passed to `Suite.init`. + allocator: Allocator, + /// I/O capability — passed through so user benchmarks can do I/O if + /// they need to. + io: Io, + /// Hierarchical name used when reporting sub-benchmark results. + name: []const u8, + + counter: *CountingAllocator, + sub_run: SubRunFn, + sub_run_ctx: *anyopaque, + + accumulated_ns: i128 = 0, + timer_running: bool = true, + start_ts: Io.Timestamp = Io.Timestamp.zero, + bytes_processed: u64 = 0, + /// If the user calls `b.run`, this Benchmark is just a container — the + /// outer runner will skip reporting its own result. + is_container: bool = false, + /// Forces the reporter to print B/op and allocs/op even when zero. + force_report_allocs: bool = false, + + /// Start (or restart) the timer fresh: zero accumulated time, zero + /// allocation counters. Call after setup and before the measured loop. + pub fn reset_timer(b: *Benchmark) void { + b.accumulated_ns = 0; + b.counter.reset(); + b.bytes_processed = 0; + b.timer_running = true; + b.start_ts = Io.Timestamp.now(b.io, .awake); + } + + /// Pause timing and allocation counting. Pair with `start_timer` for + /// per-iteration setup that should not be measured. + pub fn stop_timer(b: *Benchmark) void { + if (!b.timer_running) return; + const now = Io.Timestamp.now(b.io, .awake); + b.accumulated_ns += @as(i128, b.start_ts.durationTo(now).nanoseconds); + b.timer_running = false; + } + + /// Resume timing after `stop_timer`. + pub fn start_timer(b: *Benchmark) void { + if (b.timer_running) return; + b.start_ts = Io.Timestamp.now(b.io, .awake); + b.timer_running = true; + } + + /// Record bytes processed by this iteration; the reporter divides by + /// `ns/op` to print MB/s. + pub fn set_bytes(b: *Benchmark, bytes_per_op: u64) void { + b.bytes_processed = bytes_per_op *| b.n; + } + + /// Mark this benchmark so allocations columns are always printed, + /// regardless of the `--allocs` flag. + pub fn report_allocs(b: *Benchmark) void { + b.force_report_allocs = true; + } + + /// Optimization barrier — discourages the compiler from eliminating the + /// computation that produced `value`. Use to keep a result alive past + /// the loop body. + pub fn keep(b: *Benchmark, value: anytype) void { + _ = b; + std.mem.doNotOptimizeAway(value); + } + + /// Run a sub-benchmark. The current benchmark becomes a container and + /// its own result is not reported; the sub-benchmark is reported as + /// `parent/sub_name`. + pub fn run(b: *Benchmark, sub_name: []const u8, f: BenchFn) !void { + b.stop_timer(); + b.is_container = true; + try b.sub_run(b.sub_run_ctx, sub_name, f); + } + + /// Internal: finalize accumulated time at end of one attempt. + pub fn finish(b: *Benchmark) void { + if (b.timer_running) { + const now = Io.Timestamp.now(b.io, .awake); + b.accumulated_ns += @as(i128, b.start_ts.durationTo(now).nanoseconds); + b.timer_running = false; + } + } +}; diff --git a/src/runner.zig b/src/runner.zig new file mode 100644 index 0000000..0123d54 --- /dev/null +++ b/src/runner.zig @@ -0,0 +1,164 @@ +const std = @import("std"); +const bench = @import("benchmark.zig"); +const stats = @import("stats.zig"); +const CountingAllocator = @import("alloc.zig").CountingAllocator; + +const Benchmark = bench.Benchmark; +const BenchFn = bench.BenchFn; +const SubRunFn = bench.SubRunFn; + +pub const Result = struct { + name: []const u8, + n: u64, + elapsed_ns: u64, + ns_per_op: f64, + bytes_per_op: f64, + allocs_per_op: f64, + /// MB/s if the benchmark called `set_bytes`, null otherwise. + mb_per_sec: ?f64, + /// Whether the user explicitly asked for allocation columns. + force_report_allocs: bool, + /// If true, this benchmark only ran sub-benchmarks and should not be + /// reported as its own row. + is_container: bool, +}; + +pub const Options = struct { + min_time_ns: u64 = std.time.ns_per_s, + max_iters: u64 = 1_000_000_000, +}; + +/// Adaptive single-run: grow `n` until `elapsed >= min_time_ns` or +/// `n >= max_iters`. Returns the final `Result`. +pub fn run_one( + name: []const u8, + f: BenchFn, + counter: *CountingAllocator, + sub_run: SubRunFn, + sub_run_ctx: *anyopaque, + io: std.Io, + opts: Options, +) !Result { + var n: u64 = 1; + var last_elapsed_ns: u64 = 0; + var last_alloc_bytes: u64 = 0; + var last_alloc_count: u64 = 0; + var last_bytes_processed: u64 = 0; + var last_is_container: bool = false; + var last_force_report: bool = false; + + while (true) { + var b: Benchmark = .{ + .n = n, + .allocator = counter.allocator(), + .io = io, + .name = name, + .counter = counter, + .sub_run = sub_run, + .sub_run_ctx = sub_run_ctx, + }; + counter.reset(); + b.reset_timer(); + + try f(&b); + + b.finish(); + + const elapsed_i: i128 = if (b.accumulated_ns < 0) 0 else b.accumulated_ns; + last_elapsed_ns = @intCast(@min(elapsed_i, std.math.maxInt(u64))); + last_alloc_bytes = counter.bytes_allocated; + last_alloc_count = counter.allocs; + last_bytes_processed = b.bytes_processed; + last_is_container = b.is_container; + last_force_report = b.force_report_allocs; + + if (last_is_container) break; + if (last_elapsed_ns >= opts.min_time_ns) break; + if (n >= opts.max_iters) break; + + n = next_n(n, last_elapsed_ns, opts.min_time_ns, opts.max_iters); + } + + const fn_n: f64 = @floatFromInt(n); + const ns_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_elapsed_ns)) / fn_n; + const bytes_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_bytes)) / fn_n; + const allocs_per_op: f64 = if (n == 0) 0 else @as(f64, @floatFromInt(last_alloc_count)) / fn_n; + + const mb_per_sec: ?f64 = if (last_bytes_processed == 0 or last_elapsed_ns == 0) + null + else blk: { + const bytes_f: f64 = @floatFromInt(last_bytes_processed); + const elapsed_s: f64 = @as(f64, @floatFromInt(last_elapsed_ns)) / @as(f64, std.time.ns_per_s); + break :blk (bytes_f / (1024.0 * 1024.0)) / elapsed_s; + }; + + return .{ + .name = name, + .n = n, + .elapsed_ns = last_elapsed_ns, + .ns_per_op = ns_per_op, + .bytes_per_op = bytes_per_op, + .allocs_per_op = allocs_per_op, + .mb_per_sec = mb_per_sec, + .force_report_allocs = last_force_report, + .is_container = last_is_container, + }; +} + +/// Pick the next iteration count. Strategy: predict an `n` that should land +/// at `min_time_ns` based on the last sample, overshoot by 20 %, clamp to +/// at most ×100 growth and at most `max_iters`, round up to a "nice" number. +fn next_n(prev_n: u64, prev_elapsed_ns: u64, min_time_ns: u64, max_iters: u64) u64 { + var predicted: u64 = undefined; + if (prev_elapsed_ns == 0) { + predicted = prev_n * 100; + } else { + // (min_time_ns * 1.2) * prev_n / prev_elapsed_ns, in integer math + const num = @as(u128, min_time_ns) *| 12 *| prev_n; + const denom: u128 = @as(u128, prev_elapsed_ns) *| 10; + const p = num / denom; + predicted = if (p > std.math.maxInt(u64)) std.math.maxInt(u64) else @intCast(p); + } + + if (predicted <= prev_n) predicted = prev_n + 1; + if (predicted > prev_n *| 100) predicted = prev_n *| 100; + if (predicted > max_iters) predicted = max_iters; + return round_up(predicted); +} + +/// Round up to a "nice" decimal number (1, 2, 3, 5, 10, 20, 30, 50, 100, ...). +/// Matches Go testing's growth heuristic — avoids reporting awkward iteration +/// counts like 1357 in favor of 2000. +fn round_up(n: u64) u64 { + if (n <= 1) return 1; + var base: u64 = 1; + while (base *| 10 < n) base *|= 10; + if (n <= base) return base; + if (n <= 2 *| base) return 2 *| base; + if (n <= 3 *| base) return 3 *| base; + if (n <= 5 *| base) return 5 *| base; + return 10 *| base; +} + +test "round_up snaps to nice numbers" { + try std.testing.expectEqual(@as(u64, 1), round_up(1)); + try std.testing.expectEqual(@as(u64, 2), round_up(2)); + try std.testing.expectEqual(@as(u64, 3), round_up(3)); + try std.testing.expectEqual(@as(u64, 5), round_up(4)); + try std.testing.expectEqual(@as(u64, 5), round_up(5)); + try std.testing.expectEqual(@as(u64, 10), round_up(7)); + try std.testing.expectEqual(@as(u64, 100), round_up(73)); + try std.testing.expectEqual(@as(u64, 1000), round_up(999)); + try std.testing.expectEqual(@as(u64, 2000), round_up(1234)); + try std.testing.expectEqual(@as(u64, 1_000_000), round_up(1_000_000)); +} + +test "next_n grows toward target" { + // first run: 0 ns -> jump by 100x + try std.testing.expectEqual(@as(u64, 100), next_n(1, 0, std.time.ns_per_s, 1 << 30)); + + // 100 iters in 1 ms; target 1s -> predicted = 1.2e6 * 100 / 1e6 = 120000, rounded 200000 + const n2 = next_n(100, std.time.ns_per_ms, std.time.ns_per_s, 1 << 30); + try std.testing.expect(n2 > 100); + try std.testing.expect(n2 <= 100 * 100); +} diff --git a/src/stats.zig b/src/stats.zig new file mode 100644 index 0000000..1572bcb --- /dev/null +++ b/src/stats.zig @@ -0,0 +1,57 @@ +const std = @import("std"); + +pub const Summary = struct { + n: usize, + min: f64, + mean: f64, + stddev: f64, +}; + +pub fn summarize(samples: []const f64) Summary { + if (samples.len == 0) return .{ .n = 0, .min = 0, .mean = 0, .stddev = 0 }; + + var min: f64 = samples[0]; + var sum: f64 = 0; + for (samples) |s| { + if (s < min) min = s; + sum += s; + } + const mean = sum / @as(f64, @floatFromInt(samples.len)); + + var var_sum: f64 = 0; + for (samples) |s| { + const d = s - mean; + var_sum += d * d; + } + const variance = if (samples.len > 1) + var_sum / @as(f64, @floatFromInt(samples.len - 1)) + else + 0; + + return .{ + .n = samples.len, + .min = min, + .mean = mean, + .stddev = @sqrt(variance), + }; +} + +test "summarize empty" { + const s = summarize(&.{}); + try std.testing.expectEqual(@as(usize, 0), s.n); +} + +test "summarize single value" { + const s = summarize(&.{42.0}); + try std.testing.expectEqual(@as(f64, 42), s.min); + try std.testing.expectEqual(@as(f64, 42), s.mean); + try std.testing.expectEqual(@as(f64, 0), s.stddev); +} + +test "summarize multiple values" { + const s = summarize(&.{ 2, 4, 4, 4, 5, 5, 7, 9 }); + try std.testing.expectEqual(@as(f64, 2), s.min); + try std.testing.expectEqual(@as(f64, 5), s.mean); + // sample stddev of [2,4,4,4,5,5,7,9] = sqrt(32/7) ≈ 2.138 + try std.testing.expectApproxEqAbs(@as(f64, 2.1380899), s.stddev, 1e-5); +}