From e099a7fdda586c1035e9f8e0630c96ee521be069 Mon Sep 17 00:00:00 2001 From: Andre Henriques Date: Wed, 14 Jun 2023 20:58:56 +0100 Subject: [PATCH] more work on the huffman --- src/main.zig | 169 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 145 insertions(+), 24 deletions(-) diff --git a/src/main.zig b/src/main.zig index 786c491..95f3876 100644 --- a/src/main.zig +++ b/src/main.zig @@ -21,10 +21,9 @@ fn usage() void { exit("", .{}, 1); } -fn BitWalkerUint(T: anytype) type { - +fn BitWalkerUint(T: anytype, size: usize, reverse: bool) type { var typeInfo = @typeInfo(T); - + if (typeInfo == std.builtin.Type.Int) { @compileError("This needs to be a int"); } @@ -33,21 +32,51 @@ fn BitWalkerUint(T: anytype) type { @compileError("The integer needs to be unsigned"); } - var size = typeInfo.Int.bits; + if (typeInfo.Int.bits < size) { + @compileError("Int size needs to be the same or grater than the size"); + } return struct { - t: T, + value: T, // TODO this is probably wrong in_byte_position: u8, const Self = @This(); fn init(value: T) Self { - return Self { + var start_value = 0; + if (reverse) { + start_value = size; + } + return Self{ .t = value, - .in_byte_position = 0, + .in_byte_position = size, }; } + + fn walkBit(self: *Self) ?u1 { + if (reverse) { + if (self.in_byte_position == 0) + return null; + } else { + if (self.in_byte_position > size - 1) + return null; + } + + var mask = 1; + + for (0..self.in_byte_position) |_| { + mask = @shlExact(mask, 1); + } + + if (reverse) { + self.in_byte_position -= 1; + } else { + self.in_byte_position += 1; + } + + return self.value & mask; + } }; } @@ -126,7 +155,7 @@ const BitWalker = struct { } }; -fn HuffmanGraph(valueType: type) type { +fn HuffmanGraph(comptime valueType: type) type { return struct { const Node = struct { left: ?Node, @@ -136,7 +165,6 @@ fn HuffmanGraph(valueType: type) type { allocator: mem.Allocator, fn init(allocator: mem.Allocator) !*Node { - var node = try allocator.create(Node); node.left = null; node.right = null; @@ -155,19 +183,96 @@ fn HuffmanGraph(valueType: type) type { } self.allocator.destory(self); } + + fn depth(self: *Node) usize { + var d = 0; + if (self.right) |r| { + d = r.depth(); + } + if (self.left) |l| { + d = std.math.max(d, l.depth()); + } + return d + 1; + } }; - root: Node, + root: *Node, + allocator: mem.Allocator, + stored_depth: ?usize = null, + + iterNode: *Node, const Self = @This(); fn init(allocator: mem.Allocator) !Self { var root = try Node.init(allocator); - return Self { - root, + return Self{ + .root = root, + .iterNode = root, + .allocator = allocator, }; } + fn addValue(self: *Self, code: u64, size: usize, value: valueType) !void { + var walker = BitWalkerUint(u64, size, true).init(code); + + var curNode: *Node = self.node; + + while (walker.walkBit()) |bit| { + if (bit == 1) { + if (curNode.left) |nextNode| { + curNode = nextNode; + } else { + curNode.left = try Node.init(self.allocator); + curNode = curNode.left.?; + } + } else { + if (curNode.right) |nextNode| { + curNode = nextNode; + } else { + curNode.right = try Node.init(self.allocator); + curNode = curNode.right.?; + } + } + } + + curNode.value = value; + self.stored_depth = null; + } + + fn depth(self: *Self) usize { + if (self.stored_depth) |d| { + return d; + } + + self.stored_depth = self.root.depth() - 1; + return self.stored_depth.?; + } + + fn iter(self: *Self, bit: u1) anyerror!?valueType { + var node = self.iterNode; + + var nextNode: ?*Node = null; + + if (bit == 1) { + nextNode = node.left; + } else { + nextNode = node.right; + } + + if (nextNode) |new_node| { + if (new_node.value) |value| { + self.iterNode = self.root; + return value; + } else { + self.iterNode = new_node; + return null; + } + } + + return error.InvalidBitSequence; + } + fn deinit(self: *Self) void { self.root.deinit(); } @@ -178,15 +283,17 @@ const DynamicDecoder = struct { const Self = @This(); len_to_read: usize, - codes: [19]u3, + codes: [19]u64, walker: *BitWalker, allocator: mem.Allocator, + graph: HuffmanGraph(u64), fn init(walker: *BitWalker, len_to_read: usize, allocator: mem.Allocator) !Self { const list: [19]u8 = .{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; var lenList: [19]u3 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - var bl_count: [7]u8 = .{0, 0, 0, 0, 0, 0, 0}; + var bl_count: [7]u3 = .{ 0, 0, 0, 0, 0, 0, 0 }; + var max: u8 = 0; for (0..(len_to_read + 4)) |i| { var data: u3 = @intCast(u3, try walker.walk(3)); @@ -195,36 +302,50 @@ const DynamicDecoder = struct { continue; } bl_count[data - 1] += 1; + if (data > max) { + max = data; + } } - var bl_count: []u8 = allocator.allocator(u8, max - 1); + var next_code: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - var next_codes: [19]u3 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - - var code = 0; + var code: u64 = 0; bl_count[0] = 0; for (1..max) |bits| { - code = (code + bl_count[bits-1]) << 1; + code = @shlExact(code + bl_count[bits - 1], 1); next_code[bits] = code; - } + } + + var codes: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - var codes: [19]u3 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - for (0..19) |n| { var len = lenList[n]; if (len != 0) { - codes = next_code[len]; + codes[n] = next_code[len]; next_code[len] += 1; } } + var graph = try HuffmanGraph(u64).init(allocator); + + for (0..19) |i| { + if (lenList[i] == 0) + continue; + try graph.addValue(codes[i], lenList[i], list[i]); + } + return .{ .len_to_read = len_to_read, .codes = codes, .walker = walker, .allocator = allocator, + .graph = graph, }; } + + fn deinit(self: *Self) void { + self.graph.deinit(); + } }; const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50; @@ -326,7 +447,7 @@ const ZipFileHeader = struct { print("number of dist codes: {}", .{number_of_dist_codes}); print("number_of_length_codes: {}", .{number_of_length_codes}); - var dynamic_decoder = try DynamicDecoder.init(&bitw, number_of_length_codes); + var dynamic_decoder = try DynamicDecoder.init(&bitw, number_of_length_codes, self.allocator); _ = dynamic_decoder;