From a80e954aa3cca37c67d7e98fcbc2a45b8f83042b Mon Sep 17 00:00:00 2001 From: Andre Henriques Date: Fri, 23 Jun 2023 21:38:02 +0100 Subject: [PATCH] chore: updated the huffman stuff --- src/main.zig | 305 +++++++++++++++++++++------------------------------ 1 file changed, 124 insertions(+), 181 deletions(-) diff --git a/src/main.zig b/src/main.zig index 95f3876..3e0d8b2 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3,169 +3,33 @@ const mem = std.mem; const stdout = std.io.getStdOut().writer(); const stderr = std.io.getStdErr().writer(); -fn print(comptime str: []const u8, params: anytype) void { - stdout.print(str ++ "\n", params) catch {}; -} +const utils = @import("utils.zig"); +const print = utils.print; +const exit = utils.exit; +const pErr = utils.pErr; -fn pErr(comptime str: []const u8, params: anytype) void { - stderr.print(str ++ "\n", params) catch {}; -} - -fn exit(comptime str: []const u8, params: anytype, exitCode: u8) void { - pErr(str, params); - std.os.exit(exitCode); -} +const walkers = @import("walker.zig"); +const BitWalker = walkers.BitWalker; +const BitWalkerUint = walkers.BitWalkerUint; fn usage() void { pErr("reader ", .{}); exit("", .{}, 1); } -fn BitWalkerUint(T: anytype, size: usize, reverse: bool) type { - var typeInfo = @typeInfo(T); - - if (typeInfo == std.builtin.Type.Int) { - @compileError("This needs to be a int"); - } - - if (typeInfo.Int.is_signed) { - @compileError("The integer needs to be unsigned"); - } - - if (typeInfo.Int.bits < size) { - @compileError("Int size needs to be the same or grater than the size"); - } - - return struct { - value: T, - // TODO this is probably wrong - in_byte_position: u8, - - const Self = @This(); - - fn init(value: T) Self { - var start_value = 0; - if (reverse) { - start_value = size; - } - return Self{ - .t = value, - .in_byte_position = size, - }; - } - - fn walkBit(self: *Self) ?u1 { - if (reverse) { - if (self.in_byte_position == 0) - return null; - } else { - if (self.in_byte_position > size - 1) - return null; - } - - var mask = 1; - - for (0..self.in_byte_position) |_| { - mask = @shlExact(mask, 1); - } - - if (reverse) { - self.in_byte_position -= 1; - } else { - self.in_byte_position += 1; - } - - return self.value & mask; - } - }; -} - -const BitWalker = struct { - const Self = @This(); - - data: *[]u8, - position: usize = 0, - in_byte_position: u3 = 0, - direction: bool = false, - - fn init(data: *[]u8, direction: bool) Self { - return Self{ - .data = data, - .direction = direction, - }; - } - - // TODO direction - fn walk(self: *Self, bits: u3) !u8 { - if (bits > 8 or bits == 0) return error.invalid_bit_number; - - var byte = self.data.ptr[self.position]; - - // jumps over bytes - if (self.in_byte_position + @as(u4, bits) > 8) { - // Generate a mast that covers the last part of the old byte - var old_mask: u8 = 0; - var i: usize = 0; - while (i < 8 - @as(u4, self.in_byte_position)) : (i += 1) { - old_mask = @shlExact(old_mask, 1) + 1; - } - old_mask = @shlExact(old_mask, self.in_byte_position); - - var next_byte = self.data.ptr[self.position + 1]; - var new_byte_pos: u3 = @intCast(u3, @as(u4, bits) - (8 - @as(u4, self.in_byte_position))); - - var new_mask: u8 = 0; - var j: usize = 0; - while (j < new_byte_pos) : (j += 1) { - new_mask = @shlExact(new_mask, 1) + 1; - } - - var result = @shrExact(byte & old_mask, self.in_byte_position) + @shlExact(next_byte & new_mask, @intCast(u3, 8 - @as(u4, self.in_byte_position))); - - //print("mask: {b}, new_mask: {b}", .{ old_mask, new_mask }); - //print("here {b} {b}", .{ byte, old_mask }); - //print("here_new {b} {b}", .{ next_byte, new_mask }); - //print("result {}", .{result}); - - self.position += 1; - self.in_byte_position = new_byte_pos; - - return result; - } - - // Generate a mast that covers the last part of the old byte - var old_mask: u8 = 0; - var i: usize = 0; - while (i < bits) : (i += 1) { - old_mask = @shlExact(old_mask, 1) + 1; - } - old_mask = @shlExact(old_mask, self.in_byte_position); - - const result = @shrExact(byte & old_mask, self.in_byte_position); - - const sum = @intCast(u4, self.in_byte_position) + @intCast(u4, bits); - if (sum == 8) { - self.position += 1; - self.in_byte_position = 0; - } else { - self.in_byte_position += bits; - } - - return result; - } -}; - fn HuffmanGraph(comptime valueType: type) type { return struct { const Node = struct { - left: ?Node, - right: ?Node, + const NodeSelf = @This(); + + left: ?*NodeSelf, + right: ?*NodeSelf, value: ?valueType, allocator: mem.Allocator, - fn init(allocator: mem.Allocator) !*Node { - var node = try allocator.create(Node); + fn init(allocator: mem.Allocator) !*NodeSelf { + var node = try allocator.create(NodeSelf); node.left = null; node.right = null; node.value = null; @@ -174,18 +38,40 @@ fn HuffmanGraph(comptime valueType: type) type { return node; } - fn deinit(self: *Node) void { + fn deinit(self: *NodeSelf) void { if (self.left) |left| { left.deinit(); } if (self.right) |right| { right.deinit(); } - self.allocator.destory(self); + self.allocator.destroy(self); } - fn depth(self: *Node) usize { - var d = 0; + fn print(self: *NodeSelf, curDepth: usize, targetDepth: usize) void { + if (curDepth != targetDepth) { + if (self.left) |l| { + l.print(curDepth + 1, targetDepth); + } else { + utils.printf(" , ", .{}); + } + if (self.right) |r| { + r.print(curDepth + 1, targetDepth); + } else { + utils.printf(" . ", .{}); + } + return; + } + + if (self.value) |v| { + utils.printf(" {any} ", .{v}); + } else { + utils.printf(" _ ", .{}); + } + } + + fn depth(self: *NodeSelf) usize { + var d: usize = 0; if (self.right) |r| { d = r.depth(); } @@ -213,10 +99,17 @@ fn HuffmanGraph(comptime valueType: type) type { }; } - fn addValue(self: *Self, code: u64, size: usize, value: valueType) !void { - var walker = BitWalkerUint(u64, size, true).init(code); + fn print(self: *Self) void { + for (0..(self.depth() + 1)) |i| { + self.root.print(0, i); + utils.printf("\n", .{}); + } + } - var curNode: *Node = self.node; + fn addValue(self: *Self, code: u64, size: u8, value: valueType) !void { + var walker = try BitWalkerUint(u64, true).init(code, size); + + var curNode: *Node = self.root; while (walker.walkBit()) |bit| { if (bit == 1) { @@ -295,23 +188,21 @@ const DynamicDecoder = struct { var bl_count: [7]u3 = .{ 0, 0, 0, 0, 0, 0, 0 }; var max: u8 = 0; - for (0..(len_to_read + 4)) |i| { + for (0..len_to_read) |i| { var data: u3 = @intCast(u3, try walker.walk(3)); lenList[i] = data; if (data == 0) { continue; } - bl_count[data - 1] += 1; + bl_count[data] += 1; if (data > max) { max = data; } } var next_code: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - var code: u64 = 0; - bl_count[0] = 0; - for (1..max) |bits| { + for (1..(max + 1)) |bits| { code = @shlExact(code + bl_count[bits - 1], 1); next_code[bits] = code; } @@ -348,6 +239,75 @@ const DynamicDecoder = struct { } }; +const BlockData = struct { + const Self = @This(); + + bitw: *BitWalker, + allocator: mem.Allocator, + + last: bool, + blockType: u8, + + fn init(allocator: mem.Allocator, bitw: *BitWalker) !Self { + return Self{ + .bitw = bitw, + .allocator = allocator, + .last = try bitw.walk(1) == 1, + .blockType = try bitw.walk(2), + }; + } + + fn uncompress(self: *Self) !void { + if (self.blockType != 2) { + return error.unsuported_block_type; + } + + try self.dynamic_huffman; + + } + + fn dynamic_huffman() !void { + var bitw = self.bitw; + + var number_of_literal_codes: u32 = @as(u32, try bitw.walk(5)) + 257; + var number_of_dist_codes = try bitw.walk(5) + 1; + var number_of_length_codes = try bitw.walk(4) + 4; + + print("number of literal codes: {}", .{number_of_literal_codes}); + print("number of dist codes: {}", .{number_of_dist_codes}); + print("number_of_length_codes: {}", .{number_of_length_codes}); + + var dynamic_decoder = try DynamicDecoder.init(bitw, number_of_length_codes, self.allocator); + defer dynamic_decoder.deinit(); + + dynamic_decoder.graph.print(); + + var code_len: usize = 0; + while (code_len < number_of_literal_codes) { + var decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk()); + while (decode_value == null) { + decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk()); + } + + print("Test {any}", .{decode_value}); + + if (decode_value.? == 16) { + return error.not_implemented; + } else if (decode_value.? == 17) { + code_len += try bitw.walk(3); + } else if (decode_value.? == 18) { + code_len += try bitw.walk(7); + } else { + code_len += 1; + } + } + + return error.todo; + } + + fn read_len_code() void {} +}; + const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50; const ZipFileHeader = struct { version: u16, @@ -424,32 +384,15 @@ const ZipFileHeader = struct { var bitw = BitWalker.init(&self.compressed_content, false); - var lastBlock = try bitw.walk(1) == 1; - var blockType = try bitw.walk(2); + var block = try BlockData.init(self.allocator, &bitw); - if (lastBlock) { + if (block.last) { print("last block", .{}); } else { print("not last block", .{}); } - print("block_type: {}", .{blockType}); - - if (blockType != 2) { - return error.unsuported_block_type; - } - - var number_of_literal_codes = try bitw.walk(5); - var number_of_dist_codes = try bitw.walk(5); - var number_of_length_codes = try bitw.walk(4); - - print("number of literal codes: {}", .{number_of_literal_codes}); - print("number of dist codes: {}", .{number_of_dist_codes}); - print("number_of_length_codes: {}", .{number_of_length_codes}); - - var dynamic_decoder = try DynamicDecoder.init(&bitw, number_of_length_codes, self.allocator); - - _ = dynamic_decoder; + try block.uncompress(); self.decompressed = true; }