feat: sub 1s

2025-04-06 00:24:15 +01:00 · 2025-04-06 00:24:15 +01:00 · f8a8d4dbf2
commit f8a8d4dbf2
parent be0f2393ca
4 changed files with 115 additions and 146 deletions
--- a/zig/.gitignore
+++ b/zig/.gitignore
@ -1,5 +1,6 @@
 zig-out/
 zig-cache/
+.zig-cache/
 out.folded
 out.perf
 perf.data
--- a/zig/build.zig.zon
+++ b/zig/build.zig.zon
@ -1,72 +0,0 @@
-.{
-    // This is the default name used by packages depending on this one. For
-    // example, when a user runs `zig fetch --save <url>`, this field is used
-    // as the key in the `dependencies` table. Although the user can choose a
-    // different name, most users will stick with this provided value.
-    //
-    // It is redundant to include "zig" in this name because it is already
-    // within the Zig package namespace.
-    .name = "zig",
-
-    // This is a [Semantic Version](https://semver.org/).
-    // In a future version of Zig it will be used for package deduplication.
-    .version = "0.0.0",
-
-    // This field is optional.
-    // This is currently advisory only; Zig does not yet do anything
-    // with this value.
-    //.minimum_zig_version = "0.11.0",
-
-    // This field is optional.
-    // Each dependency must either provide a `url` and `hash`, or a `path`.
-    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
-    // Once all dependencies are fetched, `zig build` no longer requires
-    // internet connectivity.
-    .dependencies = .{
-        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
-        //.example = .{
-        //    // When updating this field to a new URL, be sure to delete the corresponding
-        //    // `hash`, otherwise you are communicating that you expect to find the old hash at
-        //    // the new URL.
-        //    .url = "https://example.com/foo.tar.gz",
-        //
-        //    // This is computed from the file contents of the directory of files that is
-        //    // obtained after fetching `url` and applying the inclusion rules given by
-        //    // `paths`.
-        //    //
-        //    // This field is the source of truth; packages do not come from a `url`; they
-        //    // come from a `hash`. `url` is just one of many possible mirrors for how to
-        //    // obtain a package matching this `hash`.
-        //    //
-        //    // Uses the [multihash](https://multiformats.io/multihash/) format.
-        //    .hash = "...",
-        //
-        //    // When this is provided, the package is found in a directory relative to the
-        //    // build root. In this case the package's hash is irrelevant and therefore not
-        //    // computed. This field and `url` are mutually exclusive.
-        //    .path = "foo",
-
-        //    // When this is set to `true`, a package is declared to be lazily
-        //    // fetched. This makes the dependency only get fetched if it is
-        //    // actually used.
-        //    .lazy = false,
-        //},
-    },
-
-    // Specifies the set of files and directories that are included in this package.
-    // Only files and directories listed here are included in the `hash` that
-    // is computed for this package. Only files listed here will remain on disk
-    // when using the zig package manager. As a rule of thumb, one should list
-    // files required for compilation plus any license(s).
-    // Paths are relative to the build root. Use the empty string (`""`) to refer to
-    // the build root itself.
-    // A directory listed here means that all files within, recursively, are included.
-    .paths = .{
-        "build.zig",
-        "build.zig.zon",
-        "src",
-        // For example...
-        //"LICENSE",
-        //"README.md",
-    },
-}
--- a/zig/get_perf.sh
+++ b/zig/get_perf.sh
@ -3,6 +3,7 @@
 rm all.svg perf.data out.perf out.folded

 zig build -Doptimize=ReleaseSafe
+# zig build -Doptimize=Debug

 perf record -a -g -- ./zig-out/bin/zig

--- a/zig/src/main.zig
+++ b/zig/src/main.zig
@ -2,56 +2,53 @@ const std = @import("std");

 const out = std.io.getStdOut().writer();

-const WORKER_SIZE = 64;
+const WORKER_SIZE = 32;

-const READ_SIZE = 8 * 1024;
+const READ_SIZE = 2048 * 2048; //2048 * 2048;

 const InfoToKeepTrack = struct {
-    min: f32,
-    max: f32,
-    mean: f64,
+    min: i32,
+    max: i32,
+    mean: i64,
    count: usize,
 };

 const DataHash = std.StringArrayHashMap(*InfoToKeepTrack);

-fn parseFloat(num: []const u8) f32 {
-    const mult: f32 = if (num[0] == '-') -1 else 1;
+fn parseFloat(num: []const u8) i32 {
+    const mult: i32 = if (num[0] == '-') -1 else 1;
    const n = num[if (num[0] == '-') 1 else 0..];
-    var nat: u32 = 0;
-    var sub: u32 = 0;
-    var sub_count: f32 = 1;
-    var e = false;

-    for (n) |i| {
-        if (i == '.') {
-            e = true;
-            continue;
-        }
-        if (e) {
-            sub *= 10;
-            sub += i - 48;
-            sub_count *= 10;
-        } else {
-            nat *= 10;
-            nat += i - 48;
-        }
+    // x.x
+    if (n.len == 3) {
+        var v = @Vector(2, i32){ n[0], n[2] };
+        const v2 = @Vector(2, i32){ '0', '0' };
+
+        v = v - v2;
+        return mult * (v[0] * 10 + v[1]);
+        // xx.x
+    } else if (n.len == 4) {
+        const v = @Vector(3, i32){ @intCast(n[0]), @intCast(n[1]), @intCast(n[3]) };
+        const v2 = @Vector(3, i32){ '0', '0', '0' };
+        const v3 = @Vector(3, i32){ 100, 10, 1 };
+
+        const nv = (v - v2) * v3;
+        return mult * @as(i32, @reduce(.Add, nv));
+    } else {
+        unreachable;
    }
-
-    return mult * @as(f32, @floatFromInt(nat)) + @as(f32, @floatFromInt(sub)) / sub_count;
 }

 fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void {
-    var iter = std.mem.split(u8, buf, "\n");
+    var iter = std.mem.splitScalar(u8, buf, '\n');
    while (iter.next()) |line| {
        if (line.len == 0) {
            continue;
        }

-        const index = std.mem.indexOf(u8, line, ";").?;
+        const index = std.mem.lastIndexOfScalar(u8, line, ';').?;

        const name = line[0..index];
-        //const number = std.fmt.parseFloat(f32, line[index + 1 ..]) catch unreachable;
        const number = parseFloat(line[index + 1 ..]);

        if (hashmap.get(name)) |v| {
@ -59,28 +56,31 @@ fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void {
            value.count += 1;
            value.max = @max(value.max, number);
            value.min = @min(value.min, number);
-            value.mean += number;
+            value.mean += @intCast(number);
        } else {
            const new_info = alloc.create(InfoToKeepTrack) catch unreachable;
            new_info.count = 1;
            new_info.max = number;
            new_info.min = number;
-            new_info.mean = number;
-            const new_name = alloc.alloc(u8, name.len) catch unreachable;
-            std.mem.copyForwards(u8, new_name, name);
-            hashmap.put(new_name, new_info) catch unreachable;
+            new_info.mean = @intCast(number);
+            hashmap.putAssumeCapacity(name, new_info);
        }
    }
 }

-fn count_fn(buf: []u8, alloc: std.mem.Allocator, finish: *bool, ghash: *DataHash, ghash_mutex: *std.Thread.Mutex, working: *bool) void {
+fn count_fn(ptr: []u8, buf: []u64, finish: *bool, ghash: *DataHash, ghash_mutex: *std.Thread.Mutex, working: *bool) void {
+    var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = false }){};
+    defer _ = gpa.deinit();
+
+    const alloc = gpa.allocator();
+
    var internal_hash_map = DataHash.init(alloc);
    defer internal_hash_map.deinit();
+    internal_hash_map.ensureTotalCapacity(5000) catch unreachable;

    while (!finish.* or working.*) {
        if (working.*) {
-            const lastEnter = std.mem.lastIndexOf(u8, buf, "\n").? - 1;
-            processLines(alloc, &internal_hash_map, buf[0..lastEnter]);
+            processLines(alloc, &internal_hash_map, ptr[buf[0]..buf[1]]);
            working.* = false;
        } else {
            std.time.sleep(1);
@ -88,29 +88,46 @@ fn count_fn(buf: []u8, alloc: std.mem.Allocator, finish: *bool, ghash: *DataHash
    }
    ghash_mutex.lock();
    for (internal_hash_map.keys(), internal_hash_map.values()) |k, iv| {
-        if (ghash.get(k)) |v| {
+        if (ghash.get(k)) |value| {
+            var v = value;
            v.max = @max(v.max, iv.max);
            v.min = @min(v.min, iv.min);
            v.mean = v.mean + iv.mean;
            v.count += iv.count;
-            alloc.destroy(iv);
-            alloc.free(k);
        } else {
-            ghash.put(k, iv) catch unreachable;
+            ghash.putAssumeCapacity(k, iv);
        }
    }
    ghash_mutex.unlock();
 }

+fn lessThan(_: void, lhs: []const u8, rhs: []const u8) bool {
+    return std.mem.order(u8, lhs, rhs) == .lt;
+}
+
 pub fn main() !void {
-    var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = true }){};
+    var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = false }){};
    defer _ = gpa.deinit();

    const alloc = gpa.allocator();

    var file = try std.fs.cwd().openFile("../measurements.txt", .{});
+    defer file.close();

-    var buf: [WORKER_SIZE][READ_SIZE]u8 = undefined;
+    const md = try file.metadata();
+    const size = md.size();
+
+    const ptr = try std.posix.mmap(
+        null,
+        size,
+        std.posix.PROT.READ,
+        .{ .TYPE = .SHARED },
+        file.handle,
+        0,
+    );
+    defer std.posix.munmap(ptr);
+
+    var buf: [WORKER_SIZE][2]u64 = undefined;

    var threads: [WORKER_SIZE]std.Thread = undefined;
    var threads_mux: [WORKER_SIZE]bool = undefined;
@ -120,41 +137,49 @@ pub fn main() !void {
    var count = DataHash.init(alloc);
    defer count.deinit();

-    var mutex = std.Thread.Mutex{};
+    try count.ensureTotalCapacity(5000);

-    for (0..WORKER_SIZE) |i| {
-        buf[i] = std.mem.zeroes([READ_SIZE]u8);
-        threads_mux[i] = false;
-        threads[i] = try std.Thread.spawn(.{}, count_fn, .{ &buf[i], alloc, &finish, &count, &mutex, &threads_mux[i] });
-    }
+    var mutex = std.Thread.Mutex{};

    var round: usize = 0;

-    while (true) {
-        if (!threads_mux[round]) {
-            const read = try file.read(&buf[round]);
-            if (read < READ_SIZE) {
-                mutex.lock();
-                processLines(alloc, &count, buf[round][0..read]);
-                mutex.unlock();
-                break;
-            }
-            threads_mux[round] = true;
-            const lastEnter = std.mem.lastIndexOf(u8, &buf[round], "\n").? + 1;
-            try file.seekTo(try file.getPos() - (READ_SIZE - lastEnter));
-        }
-        round = (round + 1) % WORKER_SIZE;
+    var reading_more_faster_than_mux: u64 = 0;
+
+    for (0..WORKER_SIZE) |i| {
+        buf[i] = std.mem.zeroes([2]u64);
+        threads_mux[i] = false;
    }

-    blk: while (true) {
-        for (threads_mux) |b| {
-            if (b) {
-                std.time.sleep(1);
-                continue :blk;
-            }
+    var p: u64 = 0;
+
+    while (true) {
+        const end = p + READ_SIZE;
+        if (end > size) {
+            mutex.lock();
+            processLines(alloc, &count, ptr[p..]);
+            mutex.unlock();
+            break;
        }
-        break;
+        if (!threads_mux[round]) {
+            buf[round][0] = p;
+            p += std.mem.lastIndexOf(u8, ptr[p..end], "\n").? + 1;
+            buf[round][1] = p - 1;
+            threads_mux[round] = true;
+        } else {
+            if (reading_more_faster_than_mux == 0) {
+                for (0..WORKER_SIZE) |i| {
+                    threads[i] = try std.Thread.spawn(.{}, count_fn, .{ ptr, &buf[i], i, &finish, &count, &mutex, &threads_mux[i] });
+                }
+            }
+            reading_more_faster_than_mux += 1;
+        }
+        comptime {
+            if (WORKER_SIZE != 32) @compileError("ONLY WORKS FOR 32");
+        }
+        // Same as doing % 32
+        round = (round + 1) & 0b11111;
    }
+
    finish = true;

    try out.print("Joining\n", .{});
@ -165,11 +190,25 @@ pub fn main() !void {

    try out.print("Done joining\n", .{});

-    for (count.keys(), count.values()) |key, item| {
-        try out.print("'{s}': {d:.2}/{d:.2}/{d:.2}\n", .{ key, item.min, item.mean / @as(f64, @floatFromInt(item.count)), item.max });
-        alloc.destroy(item);
-        alloc.free(key);
+    const toSort = try alloc.alloc([]const u8, count.keys().len);
+
+    @memcpy(toSort, count.keys());
+
+    try out.print("Start sorting\n", .{});
+    std.mem.sort([]const u8, toSort, {}, lessThan);
+    try out.print("Finished sorting\n", .{});
+
+    for (toSort) |key| {
+        const item = count.get(key).?;
+        try out.print("{s}={d:.1}/{d:.1}/{d:.1}; ", .{
+            key,
+            @as(f64, @floatFromInt(item.min)) / 10.0,
+            @as(f64, @floatFromInt(item.mean)) / @as(f64, @floatFromInt(item.count)) / 10.0,
+            @as(f64, @floatFromInt(item.max)) / 10.0,
+        });
    }
+    try out.print("\n", .{});
+    // try out.print("\n r > t: {}\n", .{reading_more_faster_than_mux});

    //try out.print("Read {d} lines\n", .{count});
 }