From 8820f239b82f1755da5a85bca2fe1f0b7a0242e5 Mon Sep 17 00:00:00 2001 From: Andre Henriques Date: Sat, 24 Jun 2023 20:33:32 +0100 Subject: [PATCH] basic zig working --- src/main.zig | 487 ++++++++++++++++++++++++++++++++++++------------- src/walker.zig | 151 +++++++-------- 2 files changed, 443 insertions(+), 195 deletions(-) diff --git a/src/main.zig b/src/main.zig index 696058a..c8ca61a 100644 --- a/src/main.zig +++ b/src/main.zig @@ -80,6 +80,25 @@ fn HuffmanGraph(comptime valueType: type) type { } return d + 1; } + + fn valid(self: *NodeSelf) bool { + var has_child = self.right == null and self.left == null; + if (self.value != null) + return has_child; + + if (has_child) + return false; + + if (self.right) |r| + if (!r.valid()) + return false; + + if (self.left) |l| + if (!l.valid()) + return false; + + return true; + } }; root: *Node, @@ -100,14 +119,15 @@ fn HuffmanGraph(comptime valueType: type) type { } fn print(self: *Self) void { - for (0..(self.depth() + 1)) |i| { + var d = self.depth() + 1; + for (0..d) |i| { self.root.print(0, i); utils.printf("\n", .{}); } } - fn addValue(self: *Self, code: u64, size: u8, value: valueType) !void { - var walker = try BitWalkerUint(u64, true).init(code, size); + fn addValue(self: *Self, code: u64, size: usize, value: valueType) !void { + var walker = try BitWalkerUint(u64).init(code, size, true); var curNode: *Node = self.root; @@ -166,78 +186,206 @@ fn HuffmanGraph(comptime valueType: type) type { return error.InvalidBitSequence; } + fn nextBitW(self: *Self, bitw: *BitWalker) !valueType { + while (true) { + if (try self.iter(try bitw.bitWalk())) |value| + return value; + } + } + + fn valid(self: *Self) bool { + return self.root.valid(); + } + fn deinit(self: *Self) void { self.root.deinit(); } }; } -const DynamicDecoder = struct { - const Self = @This(); +fn create_dynamic_graph(walker: *BitWalker, len_to_read: usize, allocator: mem.Allocator) !HuffmanGraph(u64) { + const list: [19]u8 = .{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; - len_to_read: usize, - codes: [19]u64, - walker: *BitWalker, - allocator: mem.Allocator, - graph: HuffmanGraph(u64), + const lens: []u8 = try allocator.alloc(u8, 19); + defer allocator.free(lens); + mem.set(u8, lens, 0); - fn init(walker: *BitWalker, len_to_read: usize, allocator: mem.Allocator) !Self { - const list: [19]u8 = .{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; - var lenList: [19]u3 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - - var bl_count: [7]u3 = .{ 0, 0, 0, 0, 0, 0, 0 }; - var max: u8 = 0; - - for (0..len_to_read) |i| { - var data: u3 = @intCast(u3, try walker.walk(3)); - lenList[i] = data; - if (data == 0) { - continue; - } - bl_count[data] += 1; - if (data > max) { - max = data; - } - } - - var next_code: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - var code: u64 = 0; - for (1..(max + 1)) |bits| { - code = @shlExact(code + bl_count[bits - 1], 1); - next_code[bits] = code; - } - - var codes: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - - for (0..19) |n| { - var len = lenList[n]; - if (len != 0) { - codes[n] = next_code[len]; - next_code[len] += 1; - } - } - - var graph = try HuffmanGraph(u64).init(allocator); - - for (0..19) |i| { - if (lenList[i] == 0) - continue; - try graph.addValue(codes[i], lenList[i], list[i]); - } - - return .{ - .len_to_read = len_to_read, - .codes = codes, - .walker = walker, - .allocator = allocator, - .graph = graph, - }; + for (0..len_to_read) |i| { + lens[list[i]] = @intCast(u8, try walker.walk(3)); } - fn deinit(self: *Self) void { - self.graph.deinit(); + var builder = try GraphBuilder(u64).init(allocator, 19, 0, 8); + defer builder.deinit(); + + while (!builder.done()) { + builder.addValue(builder.i, lens[builder.i]); } -}; + + return try builder.build(); +} + +fn GraphBuilder(comptime T: type) type { + return struct { + const Self = @This(); + + allocator: mem.Allocator, + values: []T, + lens: []usize, + bl_count: []usize, + size: usize, + max: usize = 0, + i: usize = 0, + + pub fn init(allocator: mem.Allocator, size: usize, default: T, maxlen: usize) !Self { + var values = try allocator.alloc(T, size); + mem.set(T, values, default); + + var lens = try allocator.alloc(usize, size); + mem.set(usize, lens, 0); + + var bl_count = try allocator.alloc(usize, maxlen); + mem.set(usize, bl_count, 0); + + return Self{ + .allocator = allocator, + .bl_count = bl_count, + .size = size, + .values = values, + .lens = lens, + }; + } + + pub fn skip(self: *Self, len: usize) void { + self.i += len; + } + + pub fn done(self: *Self) bool { + return self.i >= self.size; + } + + pub fn addValue(self: *Self, value: T, len: usize) void { + if (len > self.max) { + self.max = len; + } + self.values[self.i] = value; + self.lens[self.i] = len; + self.bl_count[len] += 1; + self.i += 1; + } + + pub fn build(self: *Self) !HuffmanGraph(T) { + self.bl_count[0] = 0; + + var next_code = try self.allocator.alloc(usize, self.max + 1); + defer self.allocator.free(next_code); + std.mem.set(usize, next_code, 0); + + var code: usize = 0; + for (1..(self.max + 1)) |bits| { + code = @shlExact(code + self.bl_count[bits - 1], 1); + next_code[bits] = code; + } + + var codes = try self.allocator.alloc(u64, self.size); + defer self.allocator.free(codes); + std.mem.set(usize, codes, 0); + + for (0..self.size) |n| { + var len = self.lens[n]; + if (len != 0) { + codes[n] = next_code[len]; + next_code[len] += 1; + } + } + + var graph = try HuffmanGraph(T).init(self.allocator); + errdefer graph.deinit(); + + for (0..self.size) |i| { + if (self.lens[i] == 0) + continue; + try graph.addValue(codes[i], self.lens[i], self.values[i]); + } + + if (!graph.valid()) + return error.graph_not_valid; + + return graph; + } + + pub fn buildWithDynamic(self: *Self, bitw: *BitWalker, graph: *HuffmanGraph(u64)) !HuffmanGraph(T) { + while (!self.done()) { + var v = try graph.nextBitW(bitw); + + if (v == 16) { + return error.not_implemented; + } else if (v == 17) { + self.skip(try bitw.walk(3) + 3); + } else if (v == 18) { + self.skip(try bitw.walk(7) + 11); + } else { + self.addValue(self.i, v); + } + } + + return try self.build(); + } + + pub fn deinit(self: *Self) void { + self.allocator.free(self.values); + self.allocator.free(self.lens); + self.allocator.free(self.bl_count); + } + }; +} + +fn get_len_value(bitw: *BitWalker, len_code: usize) !usize { + var extra_bits: usize = switch (len_code) { + 257...264 => 0, + 265...268 => 1, + 269...272 => 2, + 273...277 => 3, + 278...280 => 4, + 281...284 => 5, + 285 => 0, + else => unreachable, + }; + + var to_add = try bitw.walk(extra_bits); + + var base_value: usize = switch (len_code) { + 257...264 => 3 + len_code - 257, + 265...268 => 11 + (len_code - 265) * 2, + 269...272 => 19 + (len_code - 269) * 3, + 273...277 => 35 + (len_code - 273) * 7, + 278...280 => 67 + (len_code - 278) * 15, + 281...284 => 131 + (len_code - 281) * 31, + 285 => 0, + else => unreachable, + }; + + return base_value + to_add; +} + +fn get_dist_value(bitw: *BitWalker, dist_graph: *HuffmanGraph(u64)) !usize { + var dist = try dist_graph.nextBitW(bitw); + + var extra_bits: usize = switch (dist) { + 0...3 => 0, + 4...29 => (dist / 2) - 1, + else => unreachable, + }; + + var to_add = try bitw.walk(extra_bits); + + var base_value: usize = switch (dist) { + 0...3 => dist + 1, + 4...29 => std.math.pow(usize, 2, dist / 2) + (std.math.pow(usize, 2, (dist / 2) - 1) * (dist % 2)) + 1, + else => unreachable, + }; + + return base_value + to_add; +} const BlockData = struct { const Self = @This(); @@ -248,63 +396,157 @@ const BlockData = struct { last: bool, blockType: u8, - fn init(allocator: mem.Allocator, bitw: *BitWalker) !Self { + literal_graph: ?HuffmanGraph(u64) = null, + dist_graph: ?HuffmanGraph(u64) = null, + output: *[]u8, + + fn init(allocator: mem.Allocator, bitw: *BitWalker, output: *[]u8) !Self { return Self{ .bitw = bitw, .allocator = allocator, .last = try bitw.walk(1) == 1, - .blockType = try bitw.walk(2), + .blockType = @intCast(u8, try bitw.walk(2)), + .output = output, }; } - fn uncompress(self: *Self) !void { - if (self.blockType != 2) { - return error.unsuported_block_type; - } - - try self.dynamic_huffman(); + fn uncompress(self: *Self, start_place: usize) !usize { + return switch (self.blockType) { + 1 => blk: { + try self.fixed(); + break :blk 0; + }, + 2 => self.dynamic_huffman(start_place), + 3 => error.block_type_error, + else => unreachable, + }; } - fn dynamic_huffman(self: *Self) !void { + fn fixed(self: *Self) !void { + var litBuilder = try GraphBuilder(u64).init(self.allocator, 287, 0, 10); + defer litBuilder.deinit(); + + for (0..144) |i| { + //try lit.addValue(0b00110000 + i, 8, i); + litBuilder.addValue(i, 8); + } + for (144..256) |i| { + //try lit.addValue(0b001100000 + i, 9, i); + litBuilder.addValue(i, 9); + } + for (256..280) |i| { + //try lit.addValue(0b0000000 + i, 7, i); + litBuilder.addValue(i, 7); + } + for (280..287) |i| { + litBuilder.addValue(i, 8); + //try lit.addValue(0b11000000 + i, 8, i); + } + + var lit = try litBuilder.build(); + + self.literal_graph = lit; + + lit.print(); + + // var dist = try HuffmanGraph(u64).init(self.allocator); + + // for (0..144) |i| { + // try lit.addValue(0b00110000 + i, 8, i); + // } + // for (144..256) |i| { + // try lit.addValue(0b001100000 + i, 9, i); + // } + // for (256..280) |i| { + // try lit.addValue(0b0000000 + i, 7, i); + // } + // for (280..287) |i| { + // try lit.addValue(0b11000000 + i, 8, i); + // } + + // self.literal_graph = lit; + + //TODO dist + var bitw = self.bitw; - var number_of_literal_codes: u32 = @as(u32, try bitw.walk(5)) + 257; - var number_of_dist_codes = try bitw.walk(5) + 1; - var number_of_length_codes = try bitw.walk(4) + 4; - - print("number of literal codes: {}", .{number_of_literal_codes}); - print("number of dist codes: {}", .{number_of_dist_codes}); - print("number_of_length_codes: {}", .{number_of_length_codes}); - - var dynamic_decoder = try DynamicDecoder.init(bitw, number_of_length_codes, self.allocator); - defer dynamic_decoder.deinit(); - - dynamic_decoder.graph.print(); - - var code_len: usize = 0; - while (code_len < number_of_literal_codes) { - var decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk()); - while (decode_value == null) { - decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk()); - } - - print("Test {any}", .{decode_value}); - - if (decode_value.? == 16) { + while (true) { + var lastRead = try lit.nextBitW(bitw); + if (lastRead == 256) { + break; + } else if (lastRead > 256) { + utils.printf("❓({})", .{lastRead}); + //var dist = try self.dist_graph.?.nextBitW(bitw); + //utils.printf("<{}>", .{dist}); return error.not_implemented; - } else if (decode_value.? == 17) { - code_len += try bitw.walk(3); - } else if (decode_value.? == 18) { - code_len += try bitw.walk(7); + } else if (lastRead < 256) { + utils.printf("{c}", .{@intCast(u8, lastRead)}); } else { - code_len += 1; + unreachable; } } return error.todo; } - fn read_len_code() void {} + fn dynamic_huffman(self: *Self, start_place: usize) !usize { + var bitw = self.bitw; + + var number_of_literal_codes: u32 = @intCast(u32, try bitw.walk(5)) + 257; + var number_of_dist_codes = try bitw.walk(5) + 1; + var number_of_length_codes = try bitw.walk(4) + 4; + + var dynamic_graph = try create_dynamic_graph(bitw, number_of_length_codes, self.allocator); + defer dynamic_graph.deinit(); + + var builder = try GraphBuilder(u64).init(self.allocator, number_of_literal_codes, 0, 16); + // destory either the 1st or the 2nd graph + defer builder.deinit(); + + self.literal_graph = try builder.buildWithDynamic(bitw, &dynamic_graph); + var graph = self.literal_graph.?; + + // Destory the first builder + builder.deinit(); + + builder = try GraphBuilder(u64).init(self.allocator, number_of_dist_codes, 0, 16); + self.dist_graph = try builder.buildWithDynamic(bitw, &dynamic_graph); + + var lastRead: u64 = 0; + + var i: usize = start_place; + while (true) { + lastRead = try graph.nextBitW(bitw); + if (lastRead == 256) { + break; + } else if (lastRead > 256) { + var len = try get_len_value(bitw, lastRead); + var dist = try get_dist_value(bitw, &self.dist_graph.?); + + var pos: usize = i - dist; + for (0..len) |j| { + self.output.ptr[i] = self.output.ptr[pos + j]; + i += 1; + } + } else if (lastRead < 256) { + self.output.ptr[i] = @intCast(u8, lastRead); + i += 1; + } else { + unreachable; + } + } + + return i; + } + + fn deinit(self: *Self) void { + if (self.literal_graph != null) { + self.literal_graph.?.deinit(); + } + if (self.dist_graph != null) { + self.dist_graph.?.deinit(); + } + } }; const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50; @@ -366,10 +608,10 @@ const ZipFileHeader = struct { } fn extract(self: *Self) !void { - if (self.decompressed) { - return error.AlreadyDecompressed; - } + // already decompressed + if (self.decompressed) return; + // already decompressed if (self.compression_method == 0) { return error.uncompressed_file; } @@ -380,19 +622,22 @@ const ZipFileHeader = struct { self.uncompressed_content = try self.allocator.alloc(u8, self.uncompressed_size); errdefer self.allocator.free(self.uncompressed_content); + mem.set(u8, self.uncompressed_content, 0); - var bitw = BitWalker.init(&self.compressed_content, false); + var bitw = try BitWalker.init(&self.compressed_content); - var block = try BlockData.init(self.allocator, &bitw); + var is_last = false; + var output_place: usize = 0; - if (block.last) { - print("last block", .{}); - } else { - print("not last block", .{}); + while (!is_last) { + var block = try BlockData.init(self.allocator, &bitw, &self.uncompressed_content); + defer block.deinit(); + + is_last = block.last; + + output_place = try block.uncompress(output_place); } - try block.uncompress(); - self.decompressed = true; } @@ -460,14 +705,12 @@ pub fn main() !void { exit("Invalid file provided", .{}, 1); } - print("H: {}", .{first_file.compression_method}); + while (true) { + var second_file = try ZipFileHeader.init(allocator, reader); + defer second_file.deinit(); - var second_file = try ZipFileHeader.init(allocator, reader); - defer second_file.deinit(); - - try second_file.extract(); - - print("G: {s}", .{second_file.file_name}); - print("GI: {}", .{second_file.compression_method}); - print("xml stuff:\n{s}", .{second_file.compressed_content}); + try second_file.extract(); + print("G: {s}", .{second_file.file_name}); + print("xml stuff:\n{s}", .{second_file.uncompressed_content}); + } } diff --git a/src/walker.zig b/src/walker.zig index 2eb04b0..b9a19f2 100644 --- a/src/walker.zig +++ b/src/walker.zig @@ -1,7 +1,7 @@ const std = @import("std"); const utils = @import("utils.zig"); -pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type { +pub fn BitWalkerUint(comptime T: anytype) type { const typeInfo = @typeInfo(T); if (typeInfo != .Int) { @@ -18,15 +18,16 @@ pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type { // TODO this is probably wrong in_byte_position: i16, - size: u8, + size: usize, + reverse: bool, const Self = @This(); - pub fn init(value: T, size: u8) !Self { + pub fn init(value: T, size: usize, reverse: bool) !Self { if (typeInfo.Int.bits < size) return error.invlaid_size; - var start_value: u8 = 0; + var start_value: usize = 0; if (reverse) { start_value = size - 1; } @@ -39,9 +40,10 @@ pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type { return Self{ .value = value, - .in_byte_position = start_value, + .in_byte_position = @intCast(i16, start_value), .size = size, .mask = mask, + .reverse = reverse, }; } @@ -51,7 +53,9 @@ pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type { var result = (self.value & self.mask) == self.mask; - if (reverse) { + //utils.print("walkbit: {b:08} & {b:08} -> {b:08} ({})", .{ self.value, self.mask, self.value & self.mask, result }); + + if (self.reverse) { self.in_byte_position -= 1; if (self.mask == 1) { self.mask = 0; @@ -60,11 +64,10 @@ pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type { } } else { self.in_byte_position += 1; - self.mask = @shlExact(self.mask, 1); - if (self.in_byte_position > self.size) { + if (self.in_byte_position >= self.size) { self.mask = 0; } else { - self.mask = @shrExact(self.mask, 1); + self.mask = @shlExact(self.mask, 1); } } @@ -77,77 +80,79 @@ pub const BitWalker = struct { const Self = @This(); data: *[]u8, - position: usize = 0, - in_byte_position: u3 = 0, - direction: bool = false, - pub fn init(data: *[]u8, direction: bool) Self { + // True == most Significant + // False == least Significant + direction: bool = false, + unitWaker: BitWalkerUint(u8), + position: usize, + + pub fn init(data: *[]u8) !Self { return Self{ .data = data, - .direction = direction, + .position = 0, + .unitWaker = try BitWalkerUint(u8).init(data.ptr[0], 8, false), }; } + pub fn change_direction(self: *Self) !void { + self.direction = !self.direction; + if (self.unitWaker.mask == 0) + self.position += 1; + // TODO probs wrong when mask != 0 + self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction); + } + + pub fn smart_change_direction(self: *Self) !void { + self.direction = !self.direction; + if (self.unitWaker.mask == 0) { + self.position += 1; + self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction); + return; + } + if (!self.direction) { + self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction); + } else { + self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction); + } + } + + pub fn nextByte(self: *Self) !void { + self.position += 1; + self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction); + } + + pub fn status(self: *Self) void { + utils.print("p: {}, in: {} ({b:08})", .{ self.position, self.unitWaker.in_byte_position, self.unitWaker.mask }); + } + + pub fn walk(self: *Self, bits: usize) !u64 { + var number: u64 = 0; + + //utils.print("walk: {}", .{bits}); + + var i: usize = 1; + while (i <= bits) : (i += 1) { + //utils.print("p: {}, in_p: {}", .{ self.position, self.unitWaker.in_byte_position }); + if (self.unitWaker.walkBit()) |next_number| { + if (self.direction) { + number = @shlExact(number, 1); + number += next_number; + } else { + number += @shlExact(@as(u64, next_number), @intCast(u6, i - 1)); + } + } else { + try self.nextByte(); + i -= 1; + } + } + + //utils.print("result: {} ({b:08})", .{ number, number }); + + return number; + } + pub fn bitWalk(self: *Self) !u1 { return @intCast(u1, try self.walk(1)); } - - // TODO direction - pub fn walk(self: *Self, bits: u3) !u8 { - if (bits > 8 or bits == 0) return error.invalid_bit_number; - - var byte = self.data.ptr[self.position]; - - // jumps over bytes - if (self.in_byte_position + @as(u4, bits) > 8) { - // Generate a mast that covers the last part of the old byte - var old_mask: u8 = 0; - var i: usize = 0; - while (i < 8 - @as(u4, self.in_byte_position)) : (i += 1) { - old_mask = @shlExact(old_mask, 1) + 1; - } - old_mask = @shlExact(old_mask, self.in_byte_position); - - var next_byte = self.data.ptr[self.position + 1]; - var new_byte_pos: u3 = @intCast(u3, @as(u4, bits) - (8 - @as(u4, self.in_byte_position))); - - var new_mask: u8 = 0; - var j: usize = 0; - while (j < new_byte_pos) : (j += 1) { - new_mask = @shlExact(new_mask, 1) + 1; - } - - var result = @shrExact(byte & old_mask, self.in_byte_position) + @shlExact(next_byte & new_mask, @intCast(u3, 8 - @as(u4, self.in_byte_position))); - - //print("mask: {b}, new_mask: {b}", .{ old_mask, new_mask }); - //print("here {b} {b}", .{ byte, old_mask }); - //print("here_new {b} {b}", .{ next_byte, new_mask }); - //print("result {}", .{result}); - - self.position += 1; - self.in_byte_position = new_byte_pos; - - return result; - } - - // Generate a mast that covers the last part of the old byte - var old_mask: u8 = 0; - var i: usize = 0; - while (i < bits) : (i += 1) { - old_mask = @shlExact(old_mask, 1) + 1; - } - old_mask = @shlExact(old_mask, self.in_byte_position); - - const result = @shrExact(byte & old_mask, self.in_byte_position); - - const sum = @intCast(u4, self.in_byte_position) + @intCast(u4, bits); - if (sum == 8) { - self.position += 1; - self.in_byte_position = 0; - } else { - self.in_byte_position += bits; - } - - return result; - } };