Initial Commit + Zig implementation

2024-06-28 09:13:36 +01:00 · 2024-06-28 09:13:36 +01:00 · be0f2393ca
commit be0f2393ca
6 changed files with 336 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+measurements.txt
+measurements_1m.txt
--- a/zig/.gitignore
+++ b/zig/.gitignore
@ -0,0 +1,6 @@
+zig-out/
+zig-cache/
+out.folded
+out.perf
+perf.data
+*.svg
--- a/zig/build.zig
+++ b/zig/build.zig
@ -0,0 +1,66 @@
+const std = @import("std");
+
+// Although this function looks imperative, note that its job is to
+// declaratively construct a build graph that will be executed by an external
+// runner.
+pub fn build(b: *std.Build) void {
+    // Standard target options allows the person running `zig build` to choose
+    // what target to build for. Here we do not override the defaults, which
+    // means any target is allowed, and the default is native. Other options
+    // for restricting supported target set are available.
+    const target = b.standardTargetOptions(.{});
+
+    // Standard optimization options allow the person running `zig build` to select
+    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
+    // set a preferred release mode, allowing the user to decide how to optimize.
+    const optimize = b.standardOptimizeOption(.{});
+
+    const exe = b.addExecutable(.{
+        .name = "zig",
+        .root_source_file = b.path("src/main.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+
+    // This declares intent for the executable to be installed into the
+    // standard location when the user invokes the "install" step (the default
+    // step when running `zig build`).
+    b.installArtifact(exe);
+
+    // This *creates* a Run step in the build graph, to be executed when another
+    // step is evaluated that depends on it. The next line below will establish
+    // such a dependency.
+    const run_cmd = b.addRunArtifact(exe);
+
+    // By making the run step depend on the install step, it will be run from the
+    // installation directory rather than directly from within the cache directory.
+    // This is not necessary, however, if the application depends on other installed
+    // files, this ensures they will be present and in the expected location.
+    run_cmd.step.dependOn(b.getInstallStep());
+
+    // This allows the user to pass arguments to the application in the build
+    // command itself, like this: `zig build run -- arg1 arg2 etc`
+    if (b.args) |args| {
+        run_cmd.addArgs(args);
+    }
+
+    // This creates a build step. It will be visible in the `zig build --help` menu,
+    // and can be selected like this: `zig build run`
+    // This will evaluate the `run` step rather than the default, which is "install".
+    const run_step = b.step("run", "Run the app");
+    run_step.dependOn(&run_cmd.step);
+
+    const exe_unit_tests = b.addTest(.{
+        .root_source_file = b.path("src/main.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+
+    const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
+
+    // Similar to creating the run step earlier, this exposes a `test` step to
+    // the `zig build --help` menu, providing a way for the user to request
+    // running the unit tests.
+    const test_step = b.step("test", "Run unit tests");
+    test_step.dependOn(&run_exe_unit_tests.step);
+}
--- a/zig/build.zig.zon
+++ b/zig/build.zig.zon
@ -0,0 +1,72 @@
+.{
+    // This is the default name used by packages depending on this one. For
+    // example, when a user runs `zig fetch --save <url>`, this field is used
+    // as the key in the `dependencies` table. Although the user can choose a
+    // different name, most users will stick with this provided value.
+    //
+    // It is redundant to include "zig" in this name because it is already
+    // within the Zig package namespace.
+    .name = "zig",
+
+    // This is a [Semantic Version](https://semver.org/).
+    // In a future version of Zig it will be used for package deduplication.
+    .version = "0.0.0",
+
+    // This field is optional.
+    // This is currently advisory only; Zig does not yet do anything
+    // with this value.
+    //.minimum_zig_version = "0.11.0",
+
+    // This field is optional.
+    // Each dependency must either provide a `url` and `hash`, or a `path`.
+    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
+    // Once all dependencies are fetched, `zig build` no longer requires
+    // internet connectivity.
+    .dependencies = .{
+        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
+        //.example = .{
+        //    // When updating this field to a new URL, be sure to delete the corresponding
+        //    // `hash`, otherwise you are communicating that you expect to find the old hash at
+        //    // the new URL.
+        //    .url = "https://example.com/foo.tar.gz",
+        //
+        //    // This is computed from the file contents of the directory of files that is
+        //    // obtained after fetching `url` and applying the inclusion rules given by
+        //    // `paths`.
+        //    //
+        //    // This field is the source of truth; packages do not come from a `url`; they
+        //    // come from a `hash`. `url` is just one of many possible mirrors for how to
+        //    // obtain a package matching this `hash`.
+        //    //
+        //    // Uses the [multihash](https://multiformats.io/multihash/) format.
+        //    .hash = "...",
+        //
+        //    // When this is provided, the package is found in a directory relative to the
+        //    // build root. In this case the package's hash is irrelevant and therefore not
+        //    // computed. This field and `url` are mutually exclusive.
+        //    .path = "foo",
+
+        //    // When this is set to `true`, a package is declared to be lazily
+        //    // fetched. This makes the dependency only get fetched if it is
+        //    // actually used.
+        //    .lazy = false,
+        //},
+    },
+
+    // Specifies the set of files and directories that are included in this package.
+    // Only files and directories listed here are included in the `hash` that
+    // is computed for this package. Only files listed here will remain on disk
+    // when using the zig package manager. As a rule of thumb, one should list
+    // files required for compilation plus any license(s).
+    // Paths are relative to the build root. Use the empty string (`""`) to refer to
+    // the build root itself.
+    // A directory listed here means that all files within, recursively, are included.
+    .paths = .{
+        "build.zig",
+        "build.zig.zon",
+        "src",
+        // For example...
+        //"LICENSE",
+        //"README.md",
+    },
+}
--- a/zig/get_perf.sh
+++ b/zig/get_perf.sh
@ -0,0 +1,15 @@
+#!/bin/bash
+
+rm all.svg perf.data out.perf out.folded
+
+zig build -Doptimize=ReleaseSafe
+
+perf record -a -g -- ./zig-out/bin/zig
+
+perf script > out.perf
+
+stackcollapse-perf.pl out.perf > out.folded
+
+flamegraph.pl out.folded > all.svg
+
+firefox all.svg
--- a/zig/src/main.zig
+++ b/zig/src/main.zig
@ -0,0 +1,175 @@
+const std = @import("std");
+
+const out = std.io.getStdOut().writer();
+
+const WORKER_SIZE = 64;
+
+const READ_SIZE = 8 * 1024;
+
+const InfoToKeepTrack = struct {
+    min: f32,
+    max: f32,
+    mean: f64,
+    count: usize,
+};
+
+const DataHash = std.StringArrayHashMap(*InfoToKeepTrack);
+
+fn parseFloat(num: []const u8) f32 {
+    const mult: f32 = if (num[0] == '-') -1 else 1;
+    const n = num[if (num[0] == '-') 1 else 0..];
+    var nat: u32 = 0;
+    var sub: u32 = 0;
+    var sub_count: f32 = 1;
+    var e = false;
+
+    for (n) |i| {
+        if (i == '.') {
+            e = true;
+            continue;
+        }
+        if (e) {
+            sub *= 10;
+            sub += i - 48;
+            sub_count *= 10;
+        } else {
+            nat *= 10;
+            nat += i - 48;
+        }
+    }
+
+    return mult * @as(f32, @floatFromInt(nat)) + @as(f32, @floatFromInt(sub)) / sub_count;
+}
+
+fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void {
+    var iter = std.mem.split(u8, buf, "\n");
+    while (iter.next()) |line| {
+        if (line.len == 0) {
+            continue;
+        }
+
+        const index = std.mem.indexOf(u8, line, ";").?;
+
+        const name = line[0..index];
+        //const number = std.fmt.parseFloat(f32, line[index + 1 ..]) catch unreachable;
+        const number = parseFloat(line[index + 1 ..]);
+
+        if (hashmap.get(name)) |v| {
+            var value = v;
+            value.count += 1;
+            value.max = @max(value.max, number);
+            value.min = @min(value.min, number);
+            value.mean += number;
+        } else {
+            const new_info = alloc.create(InfoToKeepTrack) catch unreachable;
+            new_info.count = 1;
+            new_info.max = number;
+            new_info.min = number;
+            new_info.mean = number;
+            const new_name = alloc.alloc(u8, name.len) catch unreachable;
+            std.mem.copyForwards(u8, new_name, name);
+            hashmap.put(new_name, new_info) catch unreachable;
+        }
+    }
+}
+
+fn count_fn(buf: []u8, alloc: std.mem.Allocator, finish: *bool, ghash: *DataHash, ghash_mutex: *std.Thread.Mutex, working: *bool) void {
+    var internal_hash_map = DataHash.init(alloc);
+    defer internal_hash_map.deinit();
+
+    while (!finish.* or working.*) {
+        if (working.*) {
+            const lastEnter = std.mem.lastIndexOf(u8, buf, "\n").? - 1;
+            processLines(alloc, &internal_hash_map, buf[0..lastEnter]);
+            working.* = false;
+        } else {
+            std.time.sleep(1);
+        }
+    }
+    ghash_mutex.lock();
+    for (internal_hash_map.keys(), internal_hash_map.values()) |k, iv| {
+        if (ghash.get(k)) |v| {
+            v.max = @max(v.max, iv.max);
+            v.min = @min(v.min, iv.min);
+            v.mean = v.mean + iv.mean;
+            v.count += iv.count;
+            alloc.destroy(iv);
+            alloc.free(k);
+        } else {
+            ghash.put(k, iv) catch unreachable;
+        }
+    }
+    ghash_mutex.unlock();
+}
+
+pub fn main() !void {
+    var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = true }){};
+    defer _ = gpa.deinit();
+
+    const alloc = gpa.allocator();
+
+    var file = try std.fs.cwd().openFile("../measurements.txt", .{});
+
+    var buf: [WORKER_SIZE][READ_SIZE]u8 = undefined;
+
+    var threads: [WORKER_SIZE]std.Thread = undefined;
+    var threads_mux: [WORKER_SIZE]bool = undefined;
+
+    var finish = false;
+
+    var count = DataHash.init(alloc);
+    defer count.deinit();
+
+    var mutex = std.Thread.Mutex{};
+
+    for (0..WORKER_SIZE) |i| {
+        buf[i] = std.mem.zeroes([READ_SIZE]u8);
+        threads_mux[i] = false;
+        threads[i] = try std.Thread.spawn(.{}, count_fn, .{ &buf[i], alloc, &finish, &count, &mutex, &threads_mux[i] });
+    }
+
+    var round: usize = 0;
+
+    while (true) {
+        if (!threads_mux[round]) {
+            const read = try file.read(&buf[round]);
+            if (read < READ_SIZE) {
+                mutex.lock();
+                processLines(alloc, &count, buf[round][0..read]);
+                mutex.unlock();
+                break;
+            }
+            threads_mux[round] = true;
+            const lastEnter = std.mem.lastIndexOf(u8, &buf[round], "\n").? + 1;
+            try file.seekTo(try file.getPos() - (READ_SIZE - lastEnter));
+        }
+        round = (round + 1) % WORKER_SIZE;
+    }
+
+    blk: while (true) {
+        for (threads_mux) |b| {
+            if (b) {
+                std.time.sleep(1);
+                continue :blk;
+            }
+        }
+        break;
+    }
+    finish = true;
+
+    try out.print("Joining\n", .{});
+
+    for (0..WORKER_SIZE) |i| {
+        threads[i].join();
+    }
+
+    try out.print("Done joining\n", .{});
+
+    for (count.keys(), count.values()) |key, item| {
+        try out.print("'{s}': {d:.2}/{d:.2}/{d:.2}\n", .{ key, item.min, item.mean / @as(f64, @floatFromInt(item.count)), item.max });
+        alloc.destroy(item);
+        alloc.free(key);
+    }
+
+    //try out.print("Read {d} lines\n", .{count});
+}