commit be0f2393ca632198cb78a445306098394789e827 Author: Andre Henriques Date: Fri Jun 28 09:13:36 2024 +0100 Initial Commit + Zig implementation diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..42d9bad --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +measurements.txt +measurements_1m.txt diff --git a/zig/.gitignore b/zig/.gitignore new file mode 100644 index 0000000..bf40676 --- /dev/null +++ b/zig/.gitignore @@ -0,0 +1,6 @@ +zig-out/ +zig-cache/ +out.folded +out.perf +perf.data +*.svg diff --git a/zig/build.zig b/zig/build.zig new file mode 100644 index 0000000..ac231f0 --- /dev/null +++ b/zig/build.zig @@ -0,0 +1,66 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "zig", + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + const exe_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_exe_unit_tests.step); +} diff --git a/zig/build.zig.zon b/zig/build.zig.zon new file mode 100644 index 0000000..11575f5 --- /dev/null +++ b/zig/build.zig.zon @@ -0,0 +1,72 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = "zig", + + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + + // This field is optional. + // This is currently advisory only; Zig does not yet do anything + // with this value. + //.minimum_zig_version = "0.11.0", + + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/zig/get_perf.sh b/zig/get_perf.sh new file mode 100755 index 0000000..4009e2e --- /dev/null +++ b/zig/get_perf.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +rm all.svg perf.data out.perf out.folded + +zig build -Doptimize=ReleaseSafe + +perf record -a -g -- ./zig-out/bin/zig + +perf script > out.perf + +stackcollapse-perf.pl out.perf > out.folded + +flamegraph.pl out.folded > all.svg + +firefox all.svg diff --git a/zig/src/main.zig b/zig/src/main.zig new file mode 100644 index 0000000..a11b0a1 --- /dev/null +++ b/zig/src/main.zig @@ -0,0 +1,175 @@ +const std = @import("std"); + +const out = std.io.getStdOut().writer(); + +const WORKER_SIZE = 64; + +const READ_SIZE = 8 * 1024; + +const InfoToKeepTrack = struct { + min: f32, + max: f32, + mean: f64, + count: usize, +}; + +const DataHash = std.StringArrayHashMap(*InfoToKeepTrack); + +fn parseFloat(num: []const u8) f32 { + const mult: f32 = if (num[0] == '-') -1 else 1; + const n = num[if (num[0] == '-') 1 else 0..]; + var nat: u32 = 0; + var sub: u32 = 0; + var sub_count: f32 = 1; + var e = false; + + for (n) |i| { + if (i == '.') { + e = true; + continue; + } + if (e) { + sub *= 10; + sub += i - 48; + sub_count *= 10; + } else { + nat *= 10; + nat += i - 48; + } + } + + return mult * @as(f32, @floatFromInt(nat)) + @as(f32, @floatFromInt(sub)) / sub_count; +} + +fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void { + var iter = std.mem.split(u8, buf, "\n"); + while (iter.next()) |line| { + if (line.len == 0) { + continue; + } + + const index = std.mem.indexOf(u8, line, ";").?; + + const name = line[0..index]; + //const number = std.fmt.parseFloat(f32, line[index + 1 ..]) catch unreachable; + const number = parseFloat(line[index + 1 ..]); + + if (hashmap.get(name)) |v| { + var value = v; + value.count += 1; + value.max = @max(value.max, number); + value.min = @min(value.min, number); + value.mean += number; + } else { + const new_info = alloc.create(InfoToKeepTrack) catch unreachable; + new_info.count = 1; + new_info.max = number; + new_info.min = number; + new_info.mean = number; + const new_name = alloc.alloc(u8, name.len) catch unreachable; + std.mem.copyForwards(u8, new_name, name); + hashmap.put(new_name, new_info) catch unreachable; + } + } +} + +fn count_fn(buf: []u8, alloc: std.mem.Allocator, finish: *bool, ghash: *DataHash, ghash_mutex: *std.Thread.Mutex, working: *bool) void { + var internal_hash_map = DataHash.init(alloc); + defer internal_hash_map.deinit(); + + while (!finish.* or working.*) { + if (working.*) { + const lastEnter = std.mem.lastIndexOf(u8, buf, "\n").? - 1; + processLines(alloc, &internal_hash_map, buf[0..lastEnter]); + working.* = false; + } else { + std.time.sleep(1); + } + } + ghash_mutex.lock(); + for (internal_hash_map.keys(), internal_hash_map.values()) |k, iv| { + if (ghash.get(k)) |v| { + v.max = @max(v.max, iv.max); + v.min = @min(v.min, iv.min); + v.mean = v.mean + iv.mean; + v.count += iv.count; + alloc.destroy(iv); + alloc.free(k); + } else { + ghash.put(k, iv) catch unreachable; + } + } + ghash_mutex.unlock(); +} + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = true }){}; + defer _ = gpa.deinit(); + + const alloc = gpa.allocator(); + + var file = try std.fs.cwd().openFile("../measurements.txt", .{}); + + var buf: [WORKER_SIZE][READ_SIZE]u8 = undefined; + + var threads: [WORKER_SIZE]std.Thread = undefined; + var threads_mux: [WORKER_SIZE]bool = undefined; + + var finish = false; + + var count = DataHash.init(alloc); + defer count.deinit(); + + var mutex = std.Thread.Mutex{}; + + for (0..WORKER_SIZE) |i| { + buf[i] = std.mem.zeroes([READ_SIZE]u8); + threads_mux[i] = false; + threads[i] = try std.Thread.spawn(.{}, count_fn, .{ &buf[i], alloc, &finish, &count, &mutex, &threads_mux[i] }); + } + + var round: usize = 0; + + while (true) { + if (!threads_mux[round]) { + const read = try file.read(&buf[round]); + if (read < READ_SIZE) { + mutex.lock(); + processLines(alloc, &count, buf[round][0..read]); + mutex.unlock(); + break; + } + threads_mux[round] = true; + const lastEnter = std.mem.lastIndexOf(u8, &buf[round], "\n").? + 1; + try file.seekTo(try file.getPos() - (READ_SIZE - lastEnter)); + } + round = (round + 1) % WORKER_SIZE; + } + + blk: while (true) { + for (threads_mux) |b| { + if (b) { + std.time.sleep(1); + continue :blk; + } + } + break; + } + finish = true; + + try out.print("Joining\n", .{}); + + for (0..WORKER_SIZE) |i| { + threads[i].join(); + } + + try out.print("Done joining\n", .{}); + + for (count.keys(), count.values()) |key, item| { + try out.print("'{s}': {d:.2}/{d:.2}/{d:.2}\n", .{ key, item.min, item.mean / @as(f64, @floatFromInt(item.count)), item.max }); + alloc.destroy(item); + alloc.free(key); + } + + //try out.print("Read {d} lines\n", .{count}); +}