From 54f6673a0346712f32f8f5e2169766de81c9eec8 Mon Sep 17 00:00:00 2001 From: Andre Henriques Date: Sat, 24 Jun 2023 23:15:22 +0100 Subject: [PATCH] some part of zip still broken --- src/main.zig | 657 ++------------------------------------------------- 1 file changed, 18 insertions(+), 639 deletions(-) diff --git a/src/main.zig b/src/main.zig index c8ca61a..9f3cf48 100644 --- a/src/main.zig +++ b/src/main.zig @@ -12,645 +12,19 @@ const walkers = @import("walker.zig"); const BitWalker = walkers.BitWalker; const BitWalkerUint = walkers.BitWalkerUint; +const huffmanstuff = @import("HuffmanGraph.zig"); +const HuffmanGraph = huffmanstuff.HuffmanGraph; +const GraphBuilder = huffmanstuff.GraphBuilder; +const create_dynamic_graph = huffmanstuff.create_dynamic_graph; + +const zipstuff = @import("./zip.zig"); +const ZipFile = zipstuff.ZipFile; + fn usage() void { pErr("reader ", .{}); exit("", .{}, 1); } -fn HuffmanGraph(comptime valueType: type) type { - return struct { - const Node = struct { - const NodeSelf = @This(); - - left: ?*NodeSelf, - right: ?*NodeSelf, - value: ?valueType, - - allocator: mem.Allocator, - - fn init(allocator: mem.Allocator) !*NodeSelf { - var node = try allocator.create(NodeSelf); - node.left = null; - node.right = null; - node.value = null; - node.allocator = allocator; - - return node; - } - - fn deinit(self: *NodeSelf) void { - if (self.left) |left| { - left.deinit(); - } - if (self.right) |right| { - right.deinit(); - } - self.allocator.destroy(self); - } - - fn print(self: *NodeSelf, curDepth: usize, targetDepth: usize) void { - if (curDepth != targetDepth) { - if (self.left) |l| { - l.print(curDepth + 1, targetDepth); - } else { - utils.printf(" , ", .{}); - } - if (self.right) |r| { - r.print(curDepth + 1, targetDepth); - } else { - utils.printf(" . ", .{}); - } - return; - } - - if (self.value) |v| { - utils.printf(" {any} ", .{v}); - } else { - utils.printf(" _ ", .{}); - } - } - - fn depth(self: *NodeSelf) usize { - var d: usize = 0; - if (self.right) |r| { - d = r.depth(); - } - if (self.left) |l| { - d = std.math.max(d, l.depth()); - } - return d + 1; - } - - fn valid(self: *NodeSelf) bool { - var has_child = self.right == null and self.left == null; - if (self.value != null) - return has_child; - - if (has_child) - return false; - - if (self.right) |r| - if (!r.valid()) - return false; - - if (self.left) |l| - if (!l.valid()) - return false; - - return true; - } - }; - - root: *Node, - allocator: mem.Allocator, - stored_depth: ?usize = null, - - iterNode: *Node, - - const Self = @This(); - - fn init(allocator: mem.Allocator) !Self { - var root = try Node.init(allocator); - return Self{ - .root = root, - .iterNode = root, - .allocator = allocator, - }; - } - - fn print(self: *Self) void { - var d = self.depth() + 1; - for (0..d) |i| { - self.root.print(0, i); - utils.printf("\n", .{}); - } - } - - fn addValue(self: *Self, code: u64, size: usize, value: valueType) !void { - var walker = try BitWalkerUint(u64).init(code, size, true); - - var curNode: *Node = self.root; - - while (walker.walkBit()) |bit| { - if (bit == 1) { - if (curNode.left) |nextNode| { - curNode = nextNode; - } else { - curNode.left = try Node.init(self.allocator); - curNode = curNode.left.?; - } - } else { - if (curNode.right) |nextNode| { - curNode = nextNode; - } else { - curNode.right = try Node.init(self.allocator); - curNode = curNode.right.?; - } - } - } - - curNode.value = value; - self.stored_depth = null; - } - - fn depth(self: *Self) usize { - if (self.stored_depth) |d| { - return d; - } - - self.stored_depth = self.root.depth() - 1; - return self.stored_depth.?; - } - - fn iter(self: *Self, bit: u1) anyerror!?valueType { - var node = self.iterNode; - - var nextNode: ?*Node = null; - - if (bit == 1) { - nextNode = node.left; - } else { - nextNode = node.right; - } - - if (nextNode) |new_node| { - if (new_node.value) |value| { - self.iterNode = self.root; - return value; - } else { - self.iterNode = new_node; - return null; - } - } - - return error.InvalidBitSequence; - } - - fn nextBitW(self: *Self, bitw: *BitWalker) !valueType { - while (true) { - if (try self.iter(try bitw.bitWalk())) |value| - return value; - } - } - - fn valid(self: *Self) bool { - return self.root.valid(); - } - - fn deinit(self: *Self) void { - self.root.deinit(); - } - }; -} - -fn create_dynamic_graph(walker: *BitWalker, len_to_read: usize, allocator: mem.Allocator) !HuffmanGraph(u64) { - const list: [19]u8 = .{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; - - const lens: []u8 = try allocator.alloc(u8, 19); - defer allocator.free(lens); - mem.set(u8, lens, 0); - - for (0..len_to_read) |i| { - lens[list[i]] = @intCast(u8, try walker.walk(3)); - } - - var builder = try GraphBuilder(u64).init(allocator, 19, 0, 8); - defer builder.deinit(); - - while (!builder.done()) { - builder.addValue(builder.i, lens[builder.i]); - } - - return try builder.build(); -} - -fn GraphBuilder(comptime T: type) type { - return struct { - const Self = @This(); - - allocator: mem.Allocator, - values: []T, - lens: []usize, - bl_count: []usize, - size: usize, - max: usize = 0, - i: usize = 0, - - pub fn init(allocator: mem.Allocator, size: usize, default: T, maxlen: usize) !Self { - var values = try allocator.alloc(T, size); - mem.set(T, values, default); - - var lens = try allocator.alloc(usize, size); - mem.set(usize, lens, 0); - - var bl_count = try allocator.alloc(usize, maxlen); - mem.set(usize, bl_count, 0); - - return Self{ - .allocator = allocator, - .bl_count = bl_count, - .size = size, - .values = values, - .lens = lens, - }; - } - - pub fn skip(self: *Self, len: usize) void { - self.i += len; - } - - pub fn done(self: *Self) bool { - return self.i >= self.size; - } - - pub fn addValue(self: *Self, value: T, len: usize) void { - if (len > self.max) { - self.max = len; - } - self.values[self.i] = value; - self.lens[self.i] = len; - self.bl_count[len] += 1; - self.i += 1; - } - - pub fn build(self: *Self) !HuffmanGraph(T) { - self.bl_count[0] = 0; - - var next_code = try self.allocator.alloc(usize, self.max + 1); - defer self.allocator.free(next_code); - std.mem.set(usize, next_code, 0); - - var code: usize = 0; - for (1..(self.max + 1)) |bits| { - code = @shlExact(code + self.bl_count[bits - 1], 1); - next_code[bits] = code; - } - - var codes = try self.allocator.alloc(u64, self.size); - defer self.allocator.free(codes); - std.mem.set(usize, codes, 0); - - for (0..self.size) |n| { - var len = self.lens[n]; - if (len != 0) { - codes[n] = next_code[len]; - next_code[len] += 1; - } - } - - var graph = try HuffmanGraph(T).init(self.allocator); - errdefer graph.deinit(); - - for (0..self.size) |i| { - if (self.lens[i] == 0) - continue; - try graph.addValue(codes[i], self.lens[i], self.values[i]); - } - - if (!graph.valid()) - return error.graph_not_valid; - - return graph; - } - - pub fn buildWithDynamic(self: *Self, bitw: *BitWalker, graph: *HuffmanGraph(u64)) !HuffmanGraph(T) { - while (!self.done()) { - var v = try graph.nextBitW(bitw); - - if (v == 16) { - return error.not_implemented; - } else if (v == 17) { - self.skip(try bitw.walk(3) + 3); - } else if (v == 18) { - self.skip(try bitw.walk(7) + 11); - } else { - self.addValue(self.i, v); - } - } - - return try self.build(); - } - - pub fn deinit(self: *Self) void { - self.allocator.free(self.values); - self.allocator.free(self.lens); - self.allocator.free(self.bl_count); - } - }; -} - -fn get_len_value(bitw: *BitWalker, len_code: usize) !usize { - var extra_bits: usize = switch (len_code) { - 257...264 => 0, - 265...268 => 1, - 269...272 => 2, - 273...277 => 3, - 278...280 => 4, - 281...284 => 5, - 285 => 0, - else => unreachable, - }; - - var to_add = try bitw.walk(extra_bits); - - var base_value: usize = switch (len_code) { - 257...264 => 3 + len_code - 257, - 265...268 => 11 + (len_code - 265) * 2, - 269...272 => 19 + (len_code - 269) * 3, - 273...277 => 35 + (len_code - 273) * 7, - 278...280 => 67 + (len_code - 278) * 15, - 281...284 => 131 + (len_code - 281) * 31, - 285 => 0, - else => unreachable, - }; - - return base_value + to_add; -} - -fn get_dist_value(bitw: *BitWalker, dist_graph: *HuffmanGraph(u64)) !usize { - var dist = try dist_graph.nextBitW(bitw); - - var extra_bits: usize = switch (dist) { - 0...3 => 0, - 4...29 => (dist / 2) - 1, - else => unreachable, - }; - - var to_add = try bitw.walk(extra_bits); - - var base_value: usize = switch (dist) { - 0...3 => dist + 1, - 4...29 => std.math.pow(usize, 2, dist / 2) + (std.math.pow(usize, 2, (dist / 2) - 1) * (dist % 2)) + 1, - else => unreachable, - }; - - return base_value + to_add; -} - -const BlockData = struct { - const Self = @This(); - - bitw: *BitWalker, - allocator: mem.Allocator, - - last: bool, - blockType: u8, - - literal_graph: ?HuffmanGraph(u64) = null, - dist_graph: ?HuffmanGraph(u64) = null, - output: *[]u8, - - fn init(allocator: mem.Allocator, bitw: *BitWalker, output: *[]u8) !Self { - return Self{ - .bitw = bitw, - .allocator = allocator, - .last = try bitw.walk(1) == 1, - .blockType = @intCast(u8, try bitw.walk(2)), - .output = output, - }; - } - - fn uncompress(self: *Self, start_place: usize) !usize { - return switch (self.blockType) { - 1 => blk: { - try self.fixed(); - break :blk 0; - }, - 2 => self.dynamic_huffman(start_place), - 3 => error.block_type_error, - else => unreachable, - }; - } - - fn fixed(self: *Self) !void { - var litBuilder = try GraphBuilder(u64).init(self.allocator, 287, 0, 10); - defer litBuilder.deinit(); - - for (0..144) |i| { - //try lit.addValue(0b00110000 + i, 8, i); - litBuilder.addValue(i, 8); - } - for (144..256) |i| { - //try lit.addValue(0b001100000 + i, 9, i); - litBuilder.addValue(i, 9); - } - for (256..280) |i| { - //try lit.addValue(0b0000000 + i, 7, i); - litBuilder.addValue(i, 7); - } - for (280..287) |i| { - litBuilder.addValue(i, 8); - //try lit.addValue(0b11000000 + i, 8, i); - } - - var lit = try litBuilder.build(); - - self.literal_graph = lit; - - lit.print(); - - // var dist = try HuffmanGraph(u64).init(self.allocator); - - // for (0..144) |i| { - // try lit.addValue(0b00110000 + i, 8, i); - // } - // for (144..256) |i| { - // try lit.addValue(0b001100000 + i, 9, i); - // } - // for (256..280) |i| { - // try lit.addValue(0b0000000 + i, 7, i); - // } - // for (280..287) |i| { - // try lit.addValue(0b11000000 + i, 8, i); - // } - - // self.literal_graph = lit; - - //TODO dist - - var bitw = self.bitw; - - while (true) { - var lastRead = try lit.nextBitW(bitw); - if (lastRead == 256) { - break; - } else if (lastRead > 256) { - utils.printf("❓({})", .{lastRead}); - //var dist = try self.dist_graph.?.nextBitW(bitw); - //utils.printf("<{}>", .{dist}); - return error.not_implemented; - } else if (lastRead < 256) { - utils.printf("{c}", .{@intCast(u8, lastRead)}); - } else { - unreachable; - } - } - - return error.todo; - } - - fn dynamic_huffman(self: *Self, start_place: usize) !usize { - var bitw = self.bitw; - - var number_of_literal_codes: u32 = @intCast(u32, try bitw.walk(5)) + 257; - var number_of_dist_codes = try bitw.walk(5) + 1; - var number_of_length_codes = try bitw.walk(4) + 4; - - var dynamic_graph = try create_dynamic_graph(bitw, number_of_length_codes, self.allocator); - defer dynamic_graph.deinit(); - - var builder = try GraphBuilder(u64).init(self.allocator, number_of_literal_codes, 0, 16); - // destory either the 1st or the 2nd graph - defer builder.deinit(); - - self.literal_graph = try builder.buildWithDynamic(bitw, &dynamic_graph); - var graph = self.literal_graph.?; - - // Destory the first builder - builder.deinit(); - - builder = try GraphBuilder(u64).init(self.allocator, number_of_dist_codes, 0, 16); - self.dist_graph = try builder.buildWithDynamic(bitw, &dynamic_graph); - - var lastRead: u64 = 0; - - var i: usize = start_place; - while (true) { - lastRead = try graph.nextBitW(bitw); - if (lastRead == 256) { - break; - } else if (lastRead > 256) { - var len = try get_len_value(bitw, lastRead); - var dist = try get_dist_value(bitw, &self.dist_graph.?); - - var pos: usize = i - dist; - for (0..len) |j| { - self.output.ptr[i] = self.output.ptr[pos + j]; - i += 1; - } - } else if (lastRead < 256) { - self.output.ptr[i] = @intCast(u8, lastRead); - i += 1; - } else { - unreachable; - } - } - - return i; - } - - fn deinit(self: *Self) void { - if (self.literal_graph != null) { - self.literal_graph.?.deinit(); - } - if (self.dist_graph != null) { - self.dist_graph.?.deinit(); - } - } -}; - -const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50; -const ZipFileHeader = struct { - version: u16, - general: u16, - compression_method: u16, - last_mod_time: u16, - last_mod_date: u16, - crc_32: u32, - compressed_size: u32, - uncompressed_size: u32, - file_name_length: u16, - extra_field_length: u16, - - file_name: []u8, - extra_field: []u8, - compressed_content: []u8, - uncompressed_content: []u8, - decompressed: bool, - - allocator: std.mem.Allocator, - - const Self = @This(); - - fn init(allocator: std.mem.Allocator, reader: std.fs.File.Reader) !Self { - if (try reader.readInt(u32, .Big) == LOCAL_FILE_HEADER_SIGNATURE) { - return error.InvalidError; - } - - var self = Self{ - .allocator = allocator, - .version = try reader.readInt(u16, .Little), - .general = try reader.readInt(u16, .Little), - .compression_method = try reader.readInt(u16, .Little), - .last_mod_time = try reader.readInt(u16, .Little), - .last_mod_date = try reader.readInt(u16, .Little), - .crc_32 = try reader.readInt(u32, .Little), - .compressed_size = try reader.readInt(u32, .Little), - .uncompressed_size = try reader.readInt(u32, .Little), - .file_name_length = try reader.readInt(u16, .Little), - .extra_field_length = try reader.readInt(u16, .Little), - .file_name = undefined, - .extra_field = undefined, - .compressed_content = undefined, - .uncompressed_content = undefined, - .decompressed = false, - }; - - self.file_name = try allocator.alloc(u8, self.file_name_length); - self.extra_field = try allocator.alloc(u8, self.extra_field_length); - self.compressed_content = try allocator.alloc(u8, self.compressed_size); - - _ = try reader.read(self.file_name); - _ = try reader.read(self.extra_field); - _ = try reader.read(self.compressed_content); - - return self; - } - - fn extract(self: *Self) !void { - // already decompressed - if (self.decompressed) return; - - // already decompressed - if (self.compression_method == 0) { - return error.uncompressed_file; - } - - if (self.compression_method != 8) { - return error.unsuported_compression_method; - } - - self.uncompressed_content = try self.allocator.alloc(u8, self.uncompressed_size); - errdefer self.allocator.free(self.uncompressed_content); - mem.set(u8, self.uncompressed_content, 0); - - var bitw = try BitWalker.init(&self.compressed_content); - - var is_last = false; - var output_place: usize = 0; - - while (!is_last) { - var block = try BlockData.init(self.allocator, &bitw, &self.uncompressed_content); - defer block.deinit(); - - is_last = block.last; - - output_place = try block.uncompress(output_place); - } - - self.decompressed = true; - } - - fn deinit(self: *Self) void { - self.allocator.free(self.file_name); - self.allocator.free(self.extra_field); - self.allocator.free(self.compressed_content); - if (self.decompressed) { - self.allocator.free(self.uncompressed_content); - } - } -}; - pub fn main() !void { var args = std.process.args(); @@ -694,7 +68,10 @@ pub fn main() !void { var reader = file.reader(); - var first_file = try ZipFileHeader.init(allocator, reader); + var zip_file = try ZipFile.init(allocator, reader); + //defer zip_file.deinit(); + + var first_file = try zip_file.readFile(); defer first_file.deinit(); if (!std.mem.eql(u8, first_file.file_name, "mimetype")) { @@ -706,11 +83,13 @@ pub fn main() !void { } while (true) { - var second_file = try ZipFileHeader.init(allocator, reader); + var second_file = try zip_file.readFile(); defer second_file.deinit(); - try second_file.extract(); - print("G: {s}", .{second_file.file_name}); - print("xml stuff:\n{s}", .{second_file.uncompressed_content}); + print("File_Path: {s}", .{second_file.file_name}); + if (!mem.endsWith(u8, second_file.file_name, ".jpeg") and !mem.endsWith(u8, second_file.file_name, ".jpg")) { + print("Data:\n{s}", .{second_file.uncompressed_content}); + print("END File_Path: {s}", .{second_file.file_name}); + } } }