feat: sub 1s

This commit is contained in:
Andre Henriques 2025-04-06 00:24:15 +01:00
parent be0f2393ca
commit f8a8d4dbf2
4 changed files with 115 additions and 146 deletions

1
zig/.gitignore vendored
View File

@ -1,5 +1,6 @@
zig-out/ zig-out/
zig-cache/ zig-cache/
.zig-cache/
out.folded out.folded
out.perf out.perf
perf.data perf.data

View File

@ -1,72 +0,0 @@
.{
// This is the default name used by packages depending on this one. For
// example, when a user runs `zig fetch --save <url>`, this field is used
// as the key in the `dependencies` table. Although the user can choose a
// different name, most users will stick with this provided value.
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = "zig",
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// This field is optional.
// This is currently advisory only; Zig does not yet do anything
// with this value.
//.minimum_zig_version = "0.11.0",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
"build.zig",
"build.zig.zon",
"src",
// For example...
//"LICENSE",
//"README.md",
},
}

View File

@ -3,6 +3,7 @@
rm all.svg perf.data out.perf out.folded rm all.svg perf.data out.perf out.folded
zig build -Doptimize=ReleaseSafe zig build -Doptimize=ReleaseSafe
# zig build -Doptimize=Debug
perf record -a -g -- ./zig-out/bin/zig perf record -a -g -- ./zig-out/bin/zig

View File

@ -2,56 +2,53 @@ const std = @import("std");
const out = std.io.getStdOut().writer(); const out = std.io.getStdOut().writer();
const WORKER_SIZE = 64; const WORKER_SIZE = 32;
const READ_SIZE = 8 * 1024; const READ_SIZE = 2048 * 2048; //2048 * 2048;
const InfoToKeepTrack = struct { const InfoToKeepTrack = struct {
min: f32, min: i32,
max: f32, max: i32,
mean: f64, mean: i64,
count: usize, count: usize,
}; };
const DataHash = std.StringArrayHashMap(*InfoToKeepTrack); const DataHash = std.StringArrayHashMap(*InfoToKeepTrack);
fn parseFloat(num: []const u8) f32 { fn parseFloat(num: []const u8) i32 {
const mult: f32 = if (num[0] == '-') -1 else 1; const mult: i32 = if (num[0] == '-') -1 else 1;
const n = num[if (num[0] == '-') 1 else 0..]; const n = num[if (num[0] == '-') 1 else 0..];
var nat: u32 = 0;
var sub: u32 = 0;
var sub_count: f32 = 1;
var e = false;
for (n) |i| { // x.x
if (i == '.') { if (n.len == 3) {
e = true; var v = @Vector(2, i32){ n[0], n[2] };
continue; const v2 = @Vector(2, i32){ '0', '0' };
}
if (e) { v = v - v2;
sub *= 10; return mult * (v[0] * 10 + v[1]);
sub += i - 48; // xx.x
sub_count *= 10; } else if (n.len == 4) {
const v = @Vector(3, i32){ @intCast(n[0]), @intCast(n[1]), @intCast(n[3]) };
const v2 = @Vector(3, i32){ '0', '0', '0' };
const v3 = @Vector(3, i32){ 100, 10, 1 };
const nv = (v - v2) * v3;
return mult * @as(i32, @reduce(.Add, nv));
} else { } else {
nat *= 10; unreachable;
nat += i - 48;
} }
} }
return mult * @as(f32, @floatFromInt(nat)) + @as(f32, @floatFromInt(sub)) / sub_count;
}
fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void { fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void {
var iter = std.mem.split(u8, buf, "\n"); var iter = std.mem.splitScalar(u8, buf, '\n');
while (iter.next()) |line| { while (iter.next()) |line| {
if (line.len == 0) { if (line.len == 0) {
continue; continue;
} }
const index = std.mem.indexOf(u8, line, ";").?; const index = std.mem.lastIndexOfScalar(u8, line, ';').?;
const name = line[0..index]; const name = line[0..index];
//const number = std.fmt.parseFloat(f32, line[index + 1 ..]) catch unreachable;
const number = parseFloat(line[index + 1 ..]); const number = parseFloat(line[index + 1 ..]);
if (hashmap.get(name)) |v| { if (hashmap.get(name)) |v| {
@ -59,28 +56,31 @@ fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void {
value.count += 1; value.count += 1;
value.max = @max(value.max, number); value.max = @max(value.max, number);
value.min = @min(value.min, number); value.min = @min(value.min, number);
value.mean += number; value.mean += @intCast(number);
} else { } else {
const new_info = alloc.create(InfoToKeepTrack) catch unreachable; const new_info = alloc.create(InfoToKeepTrack) catch unreachable;
new_info.count = 1; new_info.count = 1;
new_info.max = number; new_info.max = number;
new_info.min = number; new_info.min = number;
new_info.mean = number; new_info.mean = @intCast(number);
const new_name = alloc.alloc(u8, name.len) catch unreachable; hashmap.putAssumeCapacity(name, new_info);
std.mem.copyForwards(u8, new_name, name);
hashmap.put(new_name, new_info) catch unreachable;
} }
} }
} }
fn count_fn(buf: []u8, alloc: std.mem.Allocator, finish: *bool, ghash: *DataHash, ghash_mutex: *std.Thread.Mutex, working: *bool) void { fn count_fn(ptr: []u8, buf: []u64, finish: *bool, ghash: *DataHash, ghash_mutex: *std.Thread.Mutex, working: *bool) void {
var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = false }){};
defer _ = gpa.deinit();
const alloc = gpa.allocator();
var internal_hash_map = DataHash.init(alloc); var internal_hash_map = DataHash.init(alloc);
defer internal_hash_map.deinit(); defer internal_hash_map.deinit();
internal_hash_map.ensureTotalCapacity(5000) catch unreachable;
while (!finish.* or working.*) { while (!finish.* or working.*) {
if (working.*) { if (working.*) {
const lastEnter = std.mem.lastIndexOf(u8, buf, "\n").? - 1; processLines(alloc, &internal_hash_map, ptr[buf[0]..buf[1]]);
processLines(alloc, &internal_hash_map, buf[0..lastEnter]);
working.* = false; working.* = false;
} else { } else {
std.time.sleep(1); std.time.sleep(1);
@ -88,29 +88,46 @@ fn count_fn(buf: []u8, alloc: std.mem.Allocator, finish: *bool, ghash: *DataHash
} }
ghash_mutex.lock(); ghash_mutex.lock();
for (internal_hash_map.keys(), internal_hash_map.values()) |k, iv| { for (internal_hash_map.keys(), internal_hash_map.values()) |k, iv| {
if (ghash.get(k)) |v| { if (ghash.get(k)) |value| {
var v = value;
v.max = @max(v.max, iv.max); v.max = @max(v.max, iv.max);
v.min = @min(v.min, iv.min); v.min = @min(v.min, iv.min);
v.mean = v.mean + iv.mean; v.mean = v.mean + iv.mean;
v.count += iv.count; v.count += iv.count;
alloc.destroy(iv);
alloc.free(k);
} else { } else {
ghash.put(k, iv) catch unreachable; ghash.putAssumeCapacity(k, iv);
} }
} }
ghash_mutex.unlock(); ghash_mutex.unlock();
} }
fn lessThan(_: void, lhs: []const u8, rhs: []const u8) bool {
return std.mem.order(u8, lhs, rhs) == .lt;
}
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = true }){}; var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = false }){};
defer _ = gpa.deinit(); defer _ = gpa.deinit();
const alloc = gpa.allocator(); const alloc = gpa.allocator();
var file = try std.fs.cwd().openFile("../measurements.txt", .{}); var file = try std.fs.cwd().openFile("../measurements.txt", .{});
defer file.close();
var buf: [WORKER_SIZE][READ_SIZE]u8 = undefined; const md = try file.metadata();
const size = md.size();
const ptr = try std.posix.mmap(
null,
size,
std.posix.PROT.READ,
.{ .TYPE = .SHARED },
file.handle,
0,
);
defer std.posix.munmap(ptr);
var buf: [WORKER_SIZE][2]u64 = undefined;
var threads: [WORKER_SIZE]std.Thread = undefined; var threads: [WORKER_SIZE]std.Thread = undefined;
var threads_mux: [WORKER_SIZE]bool = undefined; var threads_mux: [WORKER_SIZE]bool = undefined;
@ -120,41 +137,49 @@ pub fn main() !void {
var count = DataHash.init(alloc); var count = DataHash.init(alloc);
defer count.deinit(); defer count.deinit();
var mutex = std.Thread.Mutex{}; try count.ensureTotalCapacity(5000);
for (0..WORKER_SIZE) |i| { var mutex = std.Thread.Mutex{};
buf[i] = std.mem.zeroes([READ_SIZE]u8);
threads_mux[i] = false;
threads[i] = try std.Thread.spawn(.{}, count_fn, .{ &buf[i], alloc, &finish, &count, &mutex, &threads_mux[i] });
}
var round: usize = 0; var round: usize = 0;
var reading_more_faster_than_mux: u64 = 0;
for (0..WORKER_SIZE) |i| {
buf[i] = std.mem.zeroes([2]u64);
threads_mux[i] = false;
}
var p: u64 = 0;
while (true) { while (true) {
if (!threads_mux[round]) { const end = p + READ_SIZE;
const read = try file.read(&buf[round]); if (end > size) {
if (read < READ_SIZE) {
mutex.lock(); mutex.lock();
processLines(alloc, &count, buf[round][0..read]); processLines(alloc, &count, ptr[p..]);
mutex.unlock(); mutex.unlock();
break; break;
} }
if (!threads_mux[round]) {
buf[round][0] = p;
p += std.mem.lastIndexOf(u8, ptr[p..end], "\n").? + 1;
buf[round][1] = p - 1;
threads_mux[round] = true; threads_mux[round] = true;
const lastEnter = std.mem.lastIndexOf(u8, &buf[round], "\n").? + 1; } else {
try file.seekTo(try file.getPos() - (READ_SIZE - lastEnter)); if (reading_more_faster_than_mux == 0) {
for (0..WORKER_SIZE) |i| {
threads[i] = try std.Thread.spawn(.{}, count_fn, .{ ptr, &buf[i], i, &finish, &count, &mutex, &threads_mux[i] });
} }
round = (round + 1) % WORKER_SIZE; }
reading_more_faster_than_mux += 1;
}
comptime {
if (WORKER_SIZE != 32) @compileError("ONLY WORKS FOR 32");
}
// Same as doing % 32
round = (round + 1) & 0b11111;
} }
blk: while (true) {
for (threads_mux) |b| {
if (b) {
std.time.sleep(1);
continue :blk;
}
}
break;
}
finish = true; finish = true;
try out.print("Joining\n", .{}); try out.print("Joining\n", .{});
@ -165,11 +190,25 @@ pub fn main() !void {
try out.print("Done joining\n", .{}); try out.print("Done joining\n", .{});
for (count.keys(), count.values()) |key, item| { const toSort = try alloc.alloc([]const u8, count.keys().len);
try out.print("'{s}': {d:.2}/{d:.2}/{d:.2}\n", .{ key, item.min, item.mean / @as(f64, @floatFromInt(item.count)), item.max });
alloc.destroy(item); @memcpy(toSort, count.keys());
alloc.free(key);
try out.print("Start sorting\n", .{});
std.mem.sort([]const u8, toSort, {}, lessThan);
try out.print("Finished sorting\n", .{});
for (toSort) |key| {
const item = count.get(key).?;
try out.print("{s}={d:.1}/{d:.1}/{d:.1}; ", .{
key,
@as(f64, @floatFromInt(item.min)) / 10.0,
@as(f64, @floatFromInt(item.mean)) / @as(f64, @floatFromInt(item.count)) / 10.0,
@as(f64, @floatFromInt(item.max)) / 10.0,
});
} }
try out.print("\n", .{});
// try out.print("\n r > t: {}\n", .{reading_more_faster_than_mux});
//try out.print("Read {d} lines\n", .{count}); //try out.print("Read {d} lines\n", .{count});
} }