Initial Commit + Zig implementation

This commit is contained in:
Andre Henriques 2024-06-28 09:13:36 +01:00
commit be0f2393ca
6 changed files with 336 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
measurements.txt
measurements_1m.txt

6
zig/.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
zig-out/
zig-cache/
out.folded
out.perf
perf.data
*.svg

66
zig/build.zig Normal file
View File

@ -0,0 +1,66 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "zig",
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
// This declares intent for the executable to be installed into the
// standard location when the user invokes the "install" step (the default
// step when running `zig build`).
b.installArtifact(exe);
// This *creates* a Run step in the build graph, to be executed when another
// step is evaluated that depends on it. The next line below will establish
// such a dependency.
const run_cmd = b.addRunArtifact(exe);
// By making the run step depend on the install step, it will be run from the
// installation directory rather than directly from within the cache directory.
// This is not necessary, however, if the application depends on other installed
// files, this ensures they will be present and in the expected location.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build run`
// This will evaluate the `run` step rather than the default, which is "install".
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
const exe_unit_tests = b.addTest(.{
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_exe_unit_tests.step);
}

72
zig/build.zig.zon Normal file
View File

@ -0,0 +1,72 @@
.{
// This is the default name used by packages depending on this one. For
// example, when a user runs `zig fetch --save <url>`, this field is used
// as the key in the `dependencies` table. Although the user can choose a
// different name, most users will stick with this provided value.
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = "zig",
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// This field is optional.
// This is currently advisory only; Zig does not yet do anything
// with this value.
//.minimum_zig_version = "0.11.0",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
"build.zig",
"build.zig.zon",
"src",
// For example...
//"LICENSE",
//"README.md",
},
}

15
zig/get_perf.sh Executable file
View File

@ -0,0 +1,15 @@
#!/bin/bash
rm all.svg perf.data out.perf out.folded
zig build -Doptimize=ReleaseSafe
perf record -a -g -- ./zig-out/bin/zig
perf script > out.perf
stackcollapse-perf.pl out.perf > out.folded
flamegraph.pl out.folded > all.svg
firefox all.svg

175
zig/src/main.zig Normal file
View File

@ -0,0 +1,175 @@
const std = @import("std");
const out = std.io.getStdOut().writer();
const WORKER_SIZE = 64;
const READ_SIZE = 8 * 1024;
const InfoToKeepTrack = struct {
min: f32,
max: f32,
mean: f64,
count: usize,
};
const DataHash = std.StringArrayHashMap(*InfoToKeepTrack);
fn parseFloat(num: []const u8) f32 {
const mult: f32 = if (num[0] == '-') -1 else 1;
const n = num[if (num[0] == '-') 1 else 0..];
var nat: u32 = 0;
var sub: u32 = 0;
var sub_count: f32 = 1;
var e = false;
for (n) |i| {
if (i == '.') {
e = true;
continue;
}
if (e) {
sub *= 10;
sub += i - 48;
sub_count *= 10;
} else {
nat *= 10;
nat += i - 48;
}
}
return mult * @as(f32, @floatFromInt(nat)) + @as(f32, @floatFromInt(sub)) / sub_count;
}
fn processLines(alloc: std.mem.Allocator, hashmap: *DataHash, buf: []u8) void {
var iter = std.mem.split(u8, buf, "\n");
while (iter.next()) |line| {
if (line.len == 0) {
continue;
}
const index = std.mem.indexOf(u8, line, ";").?;
const name = line[0..index];
//const number = std.fmt.parseFloat(f32, line[index + 1 ..]) catch unreachable;
const number = parseFloat(line[index + 1 ..]);
if (hashmap.get(name)) |v| {
var value = v;
value.count += 1;
value.max = @max(value.max, number);
value.min = @min(value.min, number);
value.mean += number;
} else {
const new_info = alloc.create(InfoToKeepTrack) catch unreachable;
new_info.count = 1;
new_info.max = number;
new_info.min = number;
new_info.mean = number;
const new_name = alloc.alloc(u8, name.len) catch unreachable;
std.mem.copyForwards(u8, new_name, name);
hashmap.put(new_name, new_info) catch unreachable;
}
}
}
fn count_fn(buf: []u8, alloc: std.mem.Allocator, finish: *bool, ghash: *DataHash, ghash_mutex: *std.Thread.Mutex, working: *bool) void {
var internal_hash_map = DataHash.init(alloc);
defer internal_hash_map.deinit();
while (!finish.* or working.*) {
if (working.*) {
const lastEnter = std.mem.lastIndexOf(u8, buf, "\n").? - 1;
processLines(alloc, &internal_hash_map, buf[0..lastEnter]);
working.* = false;
} else {
std.time.sleep(1);
}
}
ghash_mutex.lock();
for (internal_hash_map.keys(), internal_hash_map.values()) |k, iv| {
if (ghash.get(k)) |v| {
v.max = @max(v.max, iv.max);
v.min = @min(v.min, iv.min);
v.mean = v.mean + iv.mean;
v.count += iv.count;
alloc.destroy(iv);
alloc.free(k);
} else {
ghash.put(k, iv) catch unreachable;
}
}
ghash_mutex.unlock();
}
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{ .thread_safe = true }){};
defer _ = gpa.deinit();
const alloc = gpa.allocator();
var file = try std.fs.cwd().openFile("../measurements.txt", .{});
var buf: [WORKER_SIZE][READ_SIZE]u8 = undefined;
var threads: [WORKER_SIZE]std.Thread = undefined;
var threads_mux: [WORKER_SIZE]bool = undefined;
var finish = false;
var count = DataHash.init(alloc);
defer count.deinit();
var mutex = std.Thread.Mutex{};
for (0..WORKER_SIZE) |i| {
buf[i] = std.mem.zeroes([READ_SIZE]u8);
threads_mux[i] = false;
threads[i] = try std.Thread.spawn(.{}, count_fn, .{ &buf[i], alloc, &finish, &count, &mutex, &threads_mux[i] });
}
var round: usize = 0;
while (true) {
if (!threads_mux[round]) {
const read = try file.read(&buf[round]);
if (read < READ_SIZE) {
mutex.lock();
processLines(alloc, &count, buf[round][0..read]);
mutex.unlock();
break;
}
threads_mux[round] = true;
const lastEnter = std.mem.lastIndexOf(u8, &buf[round], "\n").? + 1;
try file.seekTo(try file.getPos() - (READ_SIZE - lastEnter));
}
round = (round + 1) % WORKER_SIZE;
}
blk: while (true) {
for (threads_mux) |b| {
if (b) {
std.time.sleep(1);
continue :blk;
}
}
break;
}
finish = true;
try out.print("Joining\n", .{});
for (0..WORKER_SIZE) |i| {
threads[i].join();
}
try out.print("Done joining\n", .{});
for (count.keys(), count.values()) |key, item| {
try out.print("'{s}': {d:.2}/{d:.2}/{d:.2}\n", .{ key, item.min, item.mean / @as(f64, @floatFromInt(item.count)), item.max });
alloc.destroy(item);
alloc.free(key);
}
//try out.print("Read {d} lines\n", .{count});
}