some part of zip still broken

This commit is contained in:
Andre Henriques 2023-06-24 23:15:22 +01:00
parent 8820f239b8
commit 54f6673a03

View File

@ -12,645 +12,19 @@ const walkers = @import("walker.zig");
const BitWalker = walkers.BitWalker; const BitWalker = walkers.BitWalker;
const BitWalkerUint = walkers.BitWalkerUint; const BitWalkerUint = walkers.BitWalkerUint;
const huffmanstuff = @import("HuffmanGraph.zig");
const HuffmanGraph = huffmanstuff.HuffmanGraph;
const GraphBuilder = huffmanstuff.GraphBuilder;
const create_dynamic_graph = huffmanstuff.create_dynamic_graph;
const zipstuff = @import("./zip.zig");
const ZipFile = zipstuff.ZipFile;
fn usage() void { fn usage() void {
pErr("reader <path>", .{}); pErr("reader <path>", .{});
exit("", .{}, 1); exit("", .{}, 1);
} }
fn HuffmanGraph(comptime valueType: type) type {
return struct {
const Node = struct {
const NodeSelf = @This();
left: ?*NodeSelf,
right: ?*NodeSelf,
value: ?valueType,
allocator: mem.Allocator,
fn init(allocator: mem.Allocator) !*NodeSelf {
var node = try allocator.create(NodeSelf);
node.left = null;
node.right = null;
node.value = null;
node.allocator = allocator;
return node;
}
fn deinit(self: *NodeSelf) void {
if (self.left) |left| {
left.deinit();
}
if (self.right) |right| {
right.deinit();
}
self.allocator.destroy(self);
}
fn print(self: *NodeSelf, curDepth: usize, targetDepth: usize) void {
if (curDepth != targetDepth) {
if (self.left) |l| {
l.print(curDepth + 1, targetDepth);
} else {
utils.printf(" , ", .{});
}
if (self.right) |r| {
r.print(curDepth + 1, targetDepth);
} else {
utils.printf(" . ", .{});
}
return;
}
if (self.value) |v| {
utils.printf(" {any} ", .{v});
} else {
utils.printf(" _ ", .{});
}
}
fn depth(self: *NodeSelf) usize {
var d: usize = 0;
if (self.right) |r| {
d = r.depth();
}
if (self.left) |l| {
d = std.math.max(d, l.depth());
}
return d + 1;
}
fn valid(self: *NodeSelf) bool {
var has_child = self.right == null and self.left == null;
if (self.value != null)
return has_child;
if (has_child)
return false;
if (self.right) |r|
if (!r.valid())
return false;
if (self.left) |l|
if (!l.valid())
return false;
return true;
}
};
root: *Node,
allocator: mem.Allocator,
stored_depth: ?usize = null,
iterNode: *Node,
const Self = @This();
fn init(allocator: mem.Allocator) !Self {
var root = try Node.init(allocator);
return Self{
.root = root,
.iterNode = root,
.allocator = allocator,
};
}
fn print(self: *Self) void {
var d = self.depth() + 1;
for (0..d) |i| {
self.root.print(0, i);
utils.printf("\n", .{});
}
}
fn addValue(self: *Self, code: u64, size: usize, value: valueType) !void {
var walker = try BitWalkerUint(u64).init(code, size, true);
var curNode: *Node = self.root;
while (walker.walkBit()) |bit| {
if (bit == 1) {
if (curNode.left) |nextNode| {
curNode = nextNode;
} else {
curNode.left = try Node.init(self.allocator);
curNode = curNode.left.?;
}
} else {
if (curNode.right) |nextNode| {
curNode = nextNode;
} else {
curNode.right = try Node.init(self.allocator);
curNode = curNode.right.?;
}
}
}
curNode.value = value;
self.stored_depth = null;
}
fn depth(self: *Self) usize {
if (self.stored_depth) |d| {
return d;
}
self.stored_depth = self.root.depth() - 1;
return self.stored_depth.?;
}
fn iter(self: *Self, bit: u1) anyerror!?valueType {
var node = self.iterNode;
var nextNode: ?*Node = null;
if (bit == 1) {
nextNode = node.left;
} else {
nextNode = node.right;
}
if (nextNode) |new_node| {
if (new_node.value) |value| {
self.iterNode = self.root;
return value;
} else {
self.iterNode = new_node;
return null;
}
}
return error.InvalidBitSequence;
}
fn nextBitW(self: *Self, bitw: *BitWalker) !valueType {
while (true) {
if (try self.iter(try bitw.bitWalk())) |value|
return value;
}
}
fn valid(self: *Self) bool {
return self.root.valid();
}
fn deinit(self: *Self) void {
self.root.deinit();
}
};
}
fn create_dynamic_graph(walker: *BitWalker, len_to_read: usize, allocator: mem.Allocator) !HuffmanGraph(u64) {
const list: [19]u8 = .{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
const lens: []u8 = try allocator.alloc(u8, 19);
defer allocator.free(lens);
mem.set(u8, lens, 0);
for (0..len_to_read) |i| {
lens[list[i]] = @intCast(u8, try walker.walk(3));
}
var builder = try GraphBuilder(u64).init(allocator, 19, 0, 8);
defer builder.deinit();
while (!builder.done()) {
builder.addValue(builder.i, lens[builder.i]);
}
return try builder.build();
}
fn GraphBuilder(comptime T: type) type {
return struct {
const Self = @This();
allocator: mem.Allocator,
values: []T,
lens: []usize,
bl_count: []usize,
size: usize,
max: usize = 0,
i: usize = 0,
pub fn init(allocator: mem.Allocator, size: usize, default: T, maxlen: usize) !Self {
var values = try allocator.alloc(T, size);
mem.set(T, values, default);
var lens = try allocator.alloc(usize, size);
mem.set(usize, lens, 0);
var bl_count = try allocator.alloc(usize, maxlen);
mem.set(usize, bl_count, 0);
return Self{
.allocator = allocator,
.bl_count = bl_count,
.size = size,
.values = values,
.lens = lens,
};
}
pub fn skip(self: *Self, len: usize) void {
self.i += len;
}
pub fn done(self: *Self) bool {
return self.i >= self.size;
}
pub fn addValue(self: *Self, value: T, len: usize) void {
if (len > self.max) {
self.max = len;
}
self.values[self.i] = value;
self.lens[self.i] = len;
self.bl_count[len] += 1;
self.i += 1;
}
pub fn build(self: *Self) !HuffmanGraph(T) {
self.bl_count[0] = 0;
var next_code = try self.allocator.alloc(usize, self.max + 1);
defer self.allocator.free(next_code);
std.mem.set(usize, next_code, 0);
var code: usize = 0;
for (1..(self.max + 1)) |bits| {
code = @shlExact(code + self.bl_count[bits - 1], 1);
next_code[bits] = code;
}
var codes = try self.allocator.alloc(u64, self.size);
defer self.allocator.free(codes);
std.mem.set(usize, codes, 0);
for (0..self.size) |n| {
var len = self.lens[n];
if (len != 0) {
codes[n] = next_code[len];
next_code[len] += 1;
}
}
var graph = try HuffmanGraph(T).init(self.allocator);
errdefer graph.deinit();
for (0..self.size) |i| {
if (self.lens[i] == 0)
continue;
try graph.addValue(codes[i], self.lens[i], self.values[i]);
}
if (!graph.valid())
return error.graph_not_valid;
return graph;
}
pub fn buildWithDynamic(self: *Self, bitw: *BitWalker, graph: *HuffmanGraph(u64)) !HuffmanGraph(T) {
while (!self.done()) {
var v = try graph.nextBitW(bitw);
if (v == 16) {
return error.not_implemented;
} else if (v == 17) {
self.skip(try bitw.walk(3) + 3);
} else if (v == 18) {
self.skip(try bitw.walk(7) + 11);
} else {
self.addValue(self.i, v);
}
}
return try self.build();
}
pub fn deinit(self: *Self) void {
self.allocator.free(self.values);
self.allocator.free(self.lens);
self.allocator.free(self.bl_count);
}
};
}
fn get_len_value(bitw: *BitWalker, len_code: usize) !usize {
var extra_bits: usize = switch (len_code) {
257...264 => 0,
265...268 => 1,
269...272 => 2,
273...277 => 3,
278...280 => 4,
281...284 => 5,
285 => 0,
else => unreachable,
};
var to_add = try bitw.walk(extra_bits);
var base_value: usize = switch (len_code) {
257...264 => 3 + len_code - 257,
265...268 => 11 + (len_code - 265) * 2,
269...272 => 19 + (len_code - 269) * 3,
273...277 => 35 + (len_code - 273) * 7,
278...280 => 67 + (len_code - 278) * 15,
281...284 => 131 + (len_code - 281) * 31,
285 => 0,
else => unreachable,
};
return base_value + to_add;
}
fn get_dist_value(bitw: *BitWalker, dist_graph: *HuffmanGraph(u64)) !usize {
var dist = try dist_graph.nextBitW(bitw);
var extra_bits: usize = switch (dist) {
0...3 => 0,
4...29 => (dist / 2) - 1,
else => unreachable,
};
var to_add = try bitw.walk(extra_bits);
var base_value: usize = switch (dist) {
0...3 => dist + 1,
4...29 => std.math.pow(usize, 2, dist / 2) + (std.math.pow(usize, 2, (dist / 2) - 1) * (dist % 2)) + 1,
else => unreachable,
};
return base_value + to_add;
}
const BlockData = struct {
const Self = @This();
bitw: *BitWalker,
allocator: mem.Allocator,
last: bool,
blockType: u8,
literal_graph: ?HuffmanGraph(u64) = null,
dist_graph: ?HuffmanGraph(u64) = null,
output: *[]u8,
fn init(allocator: mem.Allocator, bitw: *BitWalker, output: *[]u8) !Self {
return Self{
.bitw = bitw,
.allocator = allocator,
.last = try bitw.walk(1) == 1,
.blockType = @intCast(u8, try bitw.walk(2)),
.output = output,
};
}
fn uncompress(self: *Self, start_place: usize) !usize {
return switch (self.blockType) {
1 => blk: {
try self.fixed();
break :blk 0;
},
2 => self.dynamic_huffman(start_place),
3 => error.block_type_error,
else => unreachable,
};
}
fn fixed(self: *Self) !void {
var litBuilder = try GraphBuilder(u64).init(self.allocator, 287, 0, 10);
defer litBuilder.deinit();
for (0..144) |i| {
//try lit.addValue(0b00110000 + i, 8, i);
litBuilder.addValue(i, 8);
}
for (144..256) |i| {
//try lit.addValue(0b001100000 + i, 9, i);
litBuilder.addValue(i, 9);
}
for (256..280) |i| {
//try lit.addValue(0b0000000 + i, 7, i);
litBuilder.addValue(i, 7);
}
for (280..287) |i| {
litBuilder.addValue(i, 8);
//try lit.addValue(0b11000000 + i, 8, i);
}
var lit = try litBuilder.build();
self.literal_graph = lit;
lit.print();
// var dist = try HuffmanGraph(u64).init(self.allocator);
// for (0..144) |i| {
// try lit.addValue(0b00110000 + i, 8, i);
// }
// for (144..256) |i| {
// try lit.addValue(0b001100000 + i, 9, i);
// }
// for (256..280) |i| {
// try lit.addValue(0b0000000 + i, 7, i);
// }
// for (280..287) |i| {
// try lit.addValue(0b11000000 + i, 8, i);
// }
// self.literal_graph = lit;
//TODO dist
var bitw = self.bitw;
while (true) {
var lastRead = try lit.nextBitW(bitw);
if (lastRead == 256) {
break;
} else if (lastRead > 256) {
utils.printf("❓({})", .{lastRead});
//var dist = try self.dist_graph.?.nextBitW(bitw);
//utils.printf("<{}>", .{dist});
return error.not_implemented;
} else if (lastRead < 256) {
utils.printf("{c}", .{@intCast(u8, lastRead)});
} else {
unreachable;
}
}
return error.todo;
}
fn dynamic_huffman(self: *Self, start_place: usize) !usize {
var bitw = self.bitw;
var number_of_literal_codes: u32 = @intCast(u32, try bitw.walk(5)) + 257;
var number_of_dist_codes = try bitw.walk(5) + 1;
var number_of_length_codes = try bitw.walk(4) + 4;
var dynamic_graph = try create_dynamic_graph(bitw, number_of_length_codes, self.allocator);
defer dynamic_graph.deinit();
var builder = try GraphBuilder(u64).init(self.allocator, number_of_literal_codes, 0, 16);
// destory either the 1st or the 2nd graph
defer builder.deinit();
self.literal_graph = try builder.buildWithDynamic(bitw, &dynamic_graph);
var graph = self.literal_graph.?;
// Destory the first builder
builder.deinit();
builder = try GraphBuilder(u64).init(self.allocator, number_of_dist_codes, 0, 16);
self.dist_graph = try builder.buildWithDynamic(bitw, &dynamic_graph);
var lastRead: u64 = 0;
var i: usize = start_place;
while (true) {
lastRead = try graph.nextBitW(bitw);
if (lastRead == 256) {
break;
} else if (lastRead > 256) {
var len = try get_len_value(bitw, lastRead);
var dist = try get_dist_value(bitw, &self.dist_graph.?);
var pos: usize = i - dist;
for (0..len) |j| {
self.output.ptr[i] = self.output.ptr[pos + j];
i += 1;
}
} else if (lastRead < 256) {
self.output.ptr[i] = @intCast(u8, lastRead);
i += 1;
} else {
unreachable;
}
}
return i;
}
fn deinit(self: *Self) void {
if (self.literal_graph != null) {
self.literal_graph.?.deinit();
}
if (self.dist_graph != null) {
self.dist_graph.?.deinit();
}
}
};
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50;
const ZipFileHeader = struct {
version: u16,
general: u16,
compression_method: u16,
last_mod_time: u16,
last_mod_date: u16,
crc_32: u32,
compressed_size: u32,
uncompressed_size: u32,
file_name_length: u16,
extra_field_length: u16,
file_name: []u8,
extra_field: []u8,
compressed_content: []u8,
uncompressed_content: []u8,
decompressed: bool,
allocator: std.mem.Allocator,
const Self = @This();
fn init(allocator: std.mem.Allocator, reader: std.fs.File.Reader) !Self {
if (try reader.readInt(u32, .Big) == LOCAL_FILE_HEADER_SIGNATURE) {
return error.InvalidError;
}
var self = Self{
.allocator = allocator,
.version = try reader.readInt(u16, .Little),
.general = try reader.readInt(u16, .Little),
.compression_method = try reader.readInt(u16, .Little),
.last_mod_time = try reader.readInt(u16, .Little),
.last_mod_date = try reader.readInt(u16, .Little),
.crc_32 = try reader.readInt(u32, .Little),
.compressed_size = try reader.readInt(u32, .Little),
.uncompressed_size = try reader.readInt(u32, .Little),
.file_name_length = try reader.readInt(u16, .Little),
.extra_field_length = try reader.readInt(u16, .Little),
.file_name = undefined,
.extra_field = undefined,
.compressed_content = undefined,
.uncompressed_content = undefined,
.decompressed = false,
};
self.file_name = try allocator.alloc(u8, self.file_name_length);
self.extra_field = try allocator.alloc(u8, self.extra_field_length);
self.compressed_content = try allocator.alloc(u8, self.compressed_size);
_ = try reader.read(self.file_name);
_ = try reader.read(self.extra_field);
_ = try reader.read(self.compressed_content);
return self;
}
fn extract(self: *Self) !void {
// already decompressed
if (self.decompressed) return;
// already decompressed
if (self.compression_method == 0) {
return error.uncompressed_file;
}
if (self.compression_method != 8) {
return error.unsuported_compression_method;
}
self.uncompressed_content = try self.allocator.alloc(u8, self.uncompressed_size);
errdefer self.allocator.free(self.uncompressed_content);
mem.set(u8, self.uncompressed_content, 0);
var bitw = try BitWalker.init(&self.compressed_content);
var is_last = false;
var output_place: usize = 0;
while (!is_last) {
var block = try BlockData.init(self.allocator, &bitw, &self.uncompressed_content);
defer block.deinit();
is_last = block.last;
output_place = try block.uncompress(output_place);
}
self.decompressed = true;
}
fn deinit(self: *Self) void {
self.allocator.free(self.file_name);
self.allocator.free(self.extra_field);
self.allocator.free(self.compressed_content);
if (self.decompressed) {
self.allocator.free(self.uncompressed_content);
}
}
};
pub fn main() !void { pub fn main() !void {
var args = std.process.args(); var args = std.process.args();
@ -694,7 +68,10 @@ pub fn main() !void {
var reader = file.reader(); var reader = file.reader();
var first_file = try ZipFileHeader.init(allocator, reader); var zip_file = try ZipFile.init(allocator, reader);
//defer zip_file.deinit();
var first_file = try zip_file.readFile();
defer first_file.deinit(); defer first_file.deinit();
if (!std.mem.eql(u8, first_file.file_name, "mimetype")) { if (!std.mem.eql(u8, first_file.file_name, "mimetype")) {
@ -706,11 +83,13 @@ pub fn main() !void {
} }
while (true) { while (true) {
var second_file = try ZipFileHeader.init(allocator, reader); var second_file = try zip_file.readFile();
defer second_file.deinit(); defer second_file.deinit();
try second_file.extract(); print("File_Path: {s}", .{second_file.file_name});
print("G: {s}", .{second_file.file_name}); if (!mem.endsWith(u8, second_file.file_name, ".jpeg") and !mem.endsWith(u8, second_file.file_name, ".jpg")) {
print("xml stuff:\n{s}", .{second_file.uncompressed_content}); print("Data:\n{s}", .{second_file.uncompressed_content});
print("END File_Path: {s}", .{second_file.file_name});
}
} }
} }