From f8a8d4dbf2ec665a297026dab8a26d38393e1400 Mon Sep 17 00:00:00 2001 From: Andre Henriques Date: Sun, 6 Apr 2025 00:24:15 +0100 Subject: [PATCH] feat: sub 1s --- zig/.gitignore | 1 + zig/build.zig.zon | 72 ------------------ zig/get_perf.sh | 1 + zig/src/main.zig | 187 ++++++++++++++++++++++++++++------------------ 4 files changed, 115 insertions(+), 146 deletions(-) delete mode 100644 zig/build.zig.zon diff --git a/zig/.gitignore b/zig/.gitignore index bf40676..946ccfb 100644 --- a/zig/.gitignore +++ b/zig/.gitignore @@ -1,5 +1,6 @@ zig-out/ zig-cache/ +.zig-cache/ out.folded out.perf perf.data diff --git a/zig/build.zig.zon b/zig/build.zig.zon deleted file mode 100644 index 11575f5..0000000 --- a/zig/build.zig.zon +++ /dev/null @@ -1,72 +0,0 @@ -.{ - // This is the default name used by packages depending on this one. For - // example, when a user runs `zig fetch --save `, this field is used - // as the key in the `dependencies` table. Although the user can choose a - // different name, most users will stick with this provided value. - // - // It is redundant to include "zig" in this name because it is already - // within the Zig package namespace. - .name = "zig", - - // This is a [Semantic Version](https://semver.org/). - // In a future version of Zig it will be used for package deduplication. - .version = "0.0.0", - - // This field is optional. - // This is currently advisory only; Zig does not yet do anything - // with this value. - //.minimum_zig_version = "0.11.0", - - // This field is optional. - // Each dependency must either provide a `url` and `hash`, or a `path`. - // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. - // Once all dependencies are fetched, `zig build` no longer requires - // internet connectivity. - .dependencies = .{ - // See `zig fetch --save ` for a command-line interface for adding dependencies. - //.example = .{ - // // When updating this field to a new URL, be sure to delete the corresponding - // // `hash`, otherwise you are communicating that you expect to find the old hash at - // // the new URL. - // .url = "https://example.com/foo.tar.gz", - // - // // This is computed from the file contents of the directory of files that is - // // obtained after fetching `url` and applying the inclusion rules given by - // // `paths`. - // // - // // This field is the source of truth; packages do not come from a `url`; they - // // come from a `hash`. `url` is just one of many possible mirrors for how to - // // obtain a package matching this `hash`. - // // - // // Uses the [multihash](https://multiformats.io/multihash/) format. - // .hash = "...", - // - // // When this is provided, the package is found in a directory relative to the - // // build root. In this case the package's hash is irrelevant and therefore not - // // computed. This field and `url` are mutually exclusive. - // .path = "foo", - - // // When this is set to `true`, a package is declared to be lazily - // // fetched. This makes the dependency only get fetched if it is - // // actually used. - // .lazy = false, - //}, - }, - - // Specifies the set of files and directories that are included in this package. - // Only files and directories listed here are included in the `hash` that - // is computed for this package. Only files listed here will remain on disk - // when using the zig package manager. As a rule of thumb, one should list - // files required for compilation plus any license(s). - // Paths are relative to the build root. Use the empty string (`""`) to refer to - // the build root itself. - // A directory listed here means that all files within, recursively, are included. - .paths = .{ - "build.zig", - "build.zig.zon", - "src", - // For example... - //"LICENSE", - //"README.md", - }, -} diff --git a/zig/get_perf.sh b/zig/get_perf.sh index 4009e2e..a527602 100755 --- a/zig/get_perf.sh +++ b/zig/get_perf.sh @@ -3,6 +3,7 @@ rm all.svg perf.data out.perf out.folded zig build -Doptimize=ReleaseSafe +# zig build -Doptimize=Debug perf record -a -g -- ./zig-out/bin/zig diff --git a/zig/src/main.zig b/zig/src/main.zig index a11b0a1..8da96ae 100644 --- a/zig/src/main.zig +++ b/zig/src/main.zig @@ -2,56 +2,53 @@ const std = @import("std"); const out = std.io.getStdOut().writer(); -const WORKER_SIZE = 64; +const WORKER_SIZE = 32; -const READ_SIZE = 8 * 1024; +const READ_SIZE = 2048 * 2048; //2048 * 2048; const InfoToKeepTrack = struct { - min: f32, - max: f32, - mean: f64, + min: i32, + max: i32, + mean: i64, count: usize, }; const DataHash = std.StringArrayHashMap(*InfoToKeepTrack); -fn parseFloat(num: []const u8) f32 { - const mult: f32 = if (num[0] == '-') -1 else 1; +fn parseFloat(num: []const u8) i32 { + const mult: i32 = if (num[0] == '-') -1 else 1; const n = num[if (num[0] == '-') 1 else 0..]; - var nat: u32 = 0; - var sub: u32 = 0; - var sub_count: f32 = 1; - var e = false; - for (n) |i| { - if (i == '.') { - e = true; - continue; - } - if (e) { - sub *= 10; - sub += i - 48; - sub_count *= 10; - } else { - nat *= 10; - nat += i - 48; - } + // x.x + if (n.len == 3) { + var v = @Vector(2, i32){ n[0], n[2] }; + const v2 = @Vector(2, i32){ '0', '0' }; + + v = v - v2; + return mult * (v[0] * 10 + v[1]); + // xx.x + } else if (n.len == 4) { + const v = @Vector(3, i32){ @intCast(n[0]), @intCast(n[1]), @intCast(n[3]) }; + const v2 = @Vector(3, i32){ '0', '0', '0' }; + const v3 = @Vector(3, i32){ 100, 10, 1 }; + + const nv = (v - v2) * v3; + return mult * @as(i32, @reduce(.Add, nv)); + } else { + unreachable; } - - return mult * @as(f32, @floatFromInt(nat)) + @as(f32, @floatFromInt(sub)) / sub_count; } fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void { - var iter = std.mem.split(u8, buf, "\n"); + var iter = std.mem.splitScalar(u8, buf, '\n'); while (iter.next()) |line| { if (line.len == 0) { continue; } - const index = std.mem.indexOf(u8, line, ";").?; + const index = std.mem.lastIndexOfScalar(u8, line, ';').?; const name = line[0..index]; - //const number = std.fmt.parseFloat(f32, line[index + 1 ..]) catch unreachable; const number = parseFloat(line[index + 1 ..]); if (hashmap.get(name)) |v| { @@ -59,28 +56,31 @@ fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void { value.count += 1; value.max = @max(value.max, number); value.min = @min(value.min, number); - value.mean += number; + value.mean += @intCast(number); } else { const new_info = alloc.create(InfoToKeepTrack) catch unreachable; new_info.count = 1; new_info.max = number; new_info.min = number; - new_info.mean = number; - const new_name = alloc.alloc(u8, name.len) catch unreachable; - std.mem.copyForwards(u8, new_name, name); - hashmap.put(new_name, new_info) catch unreachable; + new_info.mean = @intCast(number); + hashmap.putAssumeCapacity(name, new_info); } } } -fn count_fn(buf: []u8, alloc: std.mem.Allocator, finish: *bool, ghash: *DataHash, ghash_mutex: *std.Thread.Mutex, working: *bool) void { +fn count_fn(ptr: []u8, buf: []u64, finish: *bool, ghash: *DataHash, ghash_mutex: *std.Thread.Mutex, working: *bool) void { + var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = false }){}; + defer _ = gpa.deinit(); + + const alloc = gpa.allocator(); + var internal_hash_map = DataHash.init(alloc); defer internal_hash_map.deinit(); + internal_hash_map.ensureTotalCapacity(5000) catch unreachable; while (!finish.* or working.*) { if (working.*) { - const lastEnter = std.mem.lastIndexOf(u8, buf, "\n").? - 1; - processLines(alloc, &internal_hash_map, buf[0..lastEnter]); + processLines(alloc, &internal_hash_map, ptr[buf[0]..buf[1]]); working.* = false; } else { std.time.sleep(1); @@ -88,29 +88,46 @@ fn count_fn(buf: []u8, alloc: std.mem.Allocator, finish: *bool, ghash: *DataHash } ghash_mutex.lock(); for (internal_hash_map.keys(), internal_hash_map.values()) |k, iv| { - if (ghash.get(k)) |v| { + if (ghash.get(k)) |value| { + var v = value; v.max = @max(v.max, iv.max); v.min = @min(v.min, iv.min); v.mean = v.mean + iv.mean; v.count += iv.count; - alloc.destroy(iv); - alloc.free(k); } else { - ghash.put(k, iv) catch unreachable; + ghash.putAssumeCapacity(k, iv); } } ghash_mutex.unlock(); } +fn lessThan(_: void, lhs: []const u8, rhs: []const u8) bool { + return std.mem.order(u8, lhs, rhs) == .lt; +} + pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = true }){}; + var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = false }){}; defer _ = gpa.deinit(); const alloc = gpa.allocator(); var file = try std.fs.cwd().openFile("../measurements.txt", .{}); + defer file.close(); - var buf: [WORKER_SIZE][READ_SIZE]u8 = undefined; + const md = try file.metadata(); + const size = md.size(); + + const ptr = try std.posix.mmap( + null, + size, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); + defer std.posix.munmap(ptr); + + var buf: [WORKER_SIZE][2]u64 = undefined; var threads: [WORKER_SIZE]std.Thread = undefined; var threads_mux: [WORKER_SIZE]bool = undefined; @@ -120,41 +137,49 @@ pub fn main() !void { var count = DataHash.init(alloc); defer count.deinit(); - var mutex = std.Thread.Mutex{}; + try count.ensureTotalCapacity(5000); - for (0..WORKER_SIZE) |i| { - buf[i] = std.mem.zeroes([READ_SIZE]u8); - threads_mux[i] = false; - threads[i] = try std.Thread.spawn(.{}, count_fn, .{ &buf[i], alloc, &finish, &count, &mutex, &threads_mux[i] }); - } + var mutex = std.Thread.Mutex{}; var round: usize = 0; - while (true) { - if (!threads_mux[round]) { - const read = try file.read(&buf[round]); - if (read < READ_SIZE) { - mutex.lock(); - processLines(alloc, &count, buf[round][0..read]); - mutex.unlock(); - break; - } - threads_mux[round] = true; - const lastEnter = std.mem.lastIndexOf(u8, &buf[round], "\n").? + 1; - try file.seekTo(try file.getPos() - (READ_SIZE - lastEnter)); - } - round = (round + 1) % WORKER_SIZE; + var reading_more_faster_than_mux: u64 = 0; + + for (0..WORKER_SIZE) |i| { + buf[i] = std.mem.zeroes([2]u64); + threads_mux[i] = false; } - blk: while (true) { - for (threads_mux) |b| { - if (b) { - std.time.sleep(1); - continue :blk; - } + var p: u64 = 0; + + while (true) { + const end = p + READ_SIZE; + if (end > size) { + mutex.lock(); + processLines(alloc, &count, ptr[p..]); + mutex.unlock(); + break; } - break; + if (!threads_mux[round]) { + buf[round][0] = p; + p += std.mem.lastIndexOf(u8, ptr[p..end], "\n").? + 1; + buf[round][1] = p - 1; + threads_mux[round] = true; + } else { + if (reading_more_faster_than_mux == 0) { + for (0..WORKER_SIZE) |i| { + threads[i] = try std.Thread.spawn(.{}, count_fn, .{ ptr, &buf[i], i, &finish, &count, &mutex, &threads_mux[i] }); + } + } + reading_more_faster_than_mux += 1; + } + comptime { + if (WORKER_SIZE != 32) @compileError("ONLY WORKS FOR 32"); + } + // Same as doing % 32 + round = (round + 1) & 0b11111; } + finish = true; try out.print("Joining\n", .{}); @@ -165,11 +190,25 @@ pub fn main() !void { try out.print("Done joining\n", .{}); - for (count.keys(), count.values()) |key, item| { - try out.print("'{s}': {d:.2}/{d:.2}/{d:.2}\n", .{ key, item.min, item.mean / @as(f64, @floatFromInt(item.count)), item.max }); - alloc.destroy(item); - alloc.free(key); + const toSort = try alloc.alloc([]const u8, count.keys().len); + + @memcpy(toSort, count.keys()); + + try out.print("Start sorting\n", .{}); + std.mem.sort([]const u8, toSort, {}, lessThan); + try out.print("Finished sorting\n", .{}); + + for (toSort) |key| { + const item = count.get(key).?; + try out.print("{s}={d:.1}/{d:.1}/{d:.1}; ", .{ + key, + @as(f64, @floatFromInt(item.min)) / 10.0, + @as(f64, @floatFromInt(item.mean)) / @as(f64, @floatFromInt(item.count)) / 10.0, + @as(f64, @floatFromInt(item.max)) / 10.0, + }); } + try out.print("\n", .{}); + // try out.print("\n r > t: {}\n", .{reading_more_faster_than_mux}); //try out.print("Read {d} lines\n", .{count}); }