ebook-reader/src/zip.zig
2023-06-25 12:59:13 +01:00

409 lines
12 KiB
Zig

const std = @import("std");
const mem = std.mem;
const utils = @import("utils.zig");
const print = utils.print;
const walkers = @import("walker.zig");
const BitWalker = walkers.BitWalker;
const BitWalkerUint = walkers.BitWalkerUint;
const huffmanstuff = @import("HuffmanGraph.zig");
const HuffmanGraph = huffmanstuff.HuffmanGraph;
const GraphBuilder = huffmanstuff.GraphBuilder;
const create_dynamic_graph = huffmanstuff.create_dynamic_graph;
fn get_len_value(bitw: *BitWalker, len_code: usize) !usize {
var extra_bits: usize = switch (len_code) {
257...264 => 0,
265...268 => 1,
269...272 => 2,
273...276 => 3,
277...280 => 4,
281...284 => 5,
285 => 0,
else => unreachable,
};
var to_add = try bitw.walk(extra_bits);
var base_value: usize = switch (len_code) {
257 => 3,
258 => 4,
259 => 5,
260 => 6,
261 => 7,
262 => 8,
263 => 9,
264 => 10,
265 => 11,
266 => 13,
267 => 15,
268 => 17,
269 => 19,
270 => 23,
271 => 27,
272 => 31,
273 => 35,
274 => 43,
275 => 51,
276 => 59,
277 => 67,
278 => 83,
279 => 99,
280 => 115,
281 => 131,
282 => 163,
283 => 195,
284 => 227,
285 => 258,
else => unreachable,
};
return base_value + to_add;
}
fn get_dist_value(bitw: *BitWalker, dist_graph: *HuffmanGraph(u64)) !usize {
var dist = try dist_graph.nextBitW(bitw);
var extra_bits: usize = switch (dist) {
0...3 => 0,
4...29 => (dist / 2) - 1,
else => unreachable,
};
var to_add = try bitw.walk(extra_bits);
var base_value: usize = switch (dist) {
0...3 => dist + 1,
// import math
// def d(dist):
// return 2**math.floor(dist/2) + (2**(math.floor(dist / 2) - 1) * (dist % 2)) + 1
4...29 => std.math.pow(usize, 2, dist / 2) + (std.math.pow(usize, 2, (dist / 2) - 1) * (dist % 2)) + 1,
else => unreachable,
};
return base_value + to_add;
}
const BlockData = struct {
const Self = @This();
bitw: *BitWalker,
allocator: mem.Allocator,
last: bool,
blockType: u8,
literal_graph: ?HuffmanGraph(u64) = null,
dist_graph: ?HuffmanGraph(u64) = null,
output: *[]u8,
allOutput: *[]u8,
fn init(allocator: mem.Allocator, bitw: *BitWalker, output: *[]u8, allOutput: *[]u8) !Self {
return Self{
.bitw = bitw,
.allocator = allocator,
.last = try bitw.walk(1) == 1,
.blockType = @intCast(u8, try bitw.walk(2)),
.output = output,
.allOutput = allOutput,
};
}
fn uncompress(self: *Self, start_place: usize) !usize {
return switch (self.blockType) {
0 => self.not_compressed(start_place),
1 => self.fixed(start_place),
2 => self.dynamic_huffman(start_place),
3 => error.block_type_error,
else => unreachable,
};
}
fn not_compressed(self: *Self, start_value: usize) !usize {
var bitw = self.bitw;
try bitw.nextByte();
var size = try bitw.walk(16);
var nsize = try bitw.walk(24);
_ = nsize;
var i = start_value;
while (i < size) : (i += 1) {
self.output.ptr[i] = @intCast(u8, try bitw.walk(8));
}
return i;
}
fn fixed(self: *Self, start_value: usize) !usize {
var litBuilder = try GraphBuilder(u64).init(self.allocator, 288, 0, 10);
defer litBuilder.deinit();
for (0..144) |i| {
litBuilder.addValue(i, 8);
}
for (144..256) |i| {
litBuilder.addValue(i, 9);
}
for (256..280) |i| {
litBuilder.addValue(i, 7);
}
for (280..288) |i| {
litBuilder.addValue(i, 8);
}
var lit = try litBuilder.build();
self.literal_graph = lit;
var distBuilder = try GraphBuilder(u64).init(self.allocator, 32, 0, 10);
defer distBuilder.deinit();
for (0..32) |i| {
distBuilder.addValue(i, 5);
}
self.dist_graph = try distBuilder.build();
var bitw = self.bitw;
var lastRead: u64 = 0;
var i: usize = start_value;
while (true) {
lastRead = try lit.nextBitW(bitw);
if (lastRead == 256) {
break;
} else if (lastRead > 256) {
var len = try get_len_value(bitw, lastRead);
var dist = try get_dist_value(bitw, &self.dist_graph.?);
print("{} {}", .{ i, dist });
var pos: isize = @intCast(isize, i) - @intCast(isize, dist);
for (0..len) |j| {
if ((pos + @intCast(isize, j)) < 0) {
self.output.ptr[i] = self.allOutput.ptr[@intCast(usize, @intCast(isize, self.allOutput.len) - pos + @intCast(isize, j))];
} else {
self.output.ptr[i] = self.output.ptr[@intCast(usize, pos + @intCast(isize, j))];
}
i += 1;
}
} else if (lastRead < 256) {
self.output.ptr[i] = @intCast(u8, lastRead);
i += 1;
} else {
unreachable;
}
}
return i;
}
fn dynamic_huffman(self: *Self, start_place: usize) !usize {
var bitw = self.bitw;
var number_of_literal_codes: u32 = @intCast(u32, try bitw.walk(5)) + 257;
var number_of_dist_codes = try bitw.walk(5) + 1;
var number_of_length_codes = try bitw.walk(4) + 4;
var dynamic_graph = try create_dynamic_graph(bitw, number_of_length_codes, self.allocator);
defer dynamic_graph.deinit();
var builder = try GraphBuilder(u64).init(self.allocator, number_of_literal_codes, 0, 16);
// destory either the 1st or the 2nd graph
defer builder.deinit();
self.literal_graph = try builder.buildWithDynamic(bitw, &dynamic_graph);
var graph = self.literal_graph.?;
// Destory the first builder
builder.deinit();
builder = try GraphBuilder(u64).init(self.allocator, number_of_dist_codes, 0, 16);
self.dist_graph = try builder.buildWithDynamic(bitw, &dynamic_graph);
var lastRead: u64 = 0;
var i: usize = start_place;
while (true) {
lastRead = try graph.nextBitW(bitw);
if (lastRead == 256) {
break;
} else if (lastRead > 256) {
var len = try get_len_value(bitw, lastRead);
var dist = try get_dist_value(bitw, &self.dist_graph.?);
var pos: isize = @intCast(isize, i) - @intCast(isize, dist);
for (0..len) |j| {
if ((pos + @intCast(isize, j)) < 0) {
self.output.ptr[i] = self.allOutput.ptr[@intCast(usize, @intCast(isize, self.allOutput.len) - pos + @intCast(isize, j))];
} else {
self.output.ptr[i] = self.output.ptr[@intCast(usize, pos + @intCast(isize, j))];
}
i += 1;
}
} else if (lastRead < 256) {
self.output.ptr[i] = @intCast(u8, lastRead);
i += 1;
} else {
unreachable;
}
}
return i;
}
fn deinit(self: *Self) void {
if (self.literal_graph != null) {
self.literal_graph.?.deinit();
}
if (self.dist_graph != null) {
self.dist_graph.?.deinit();
}
}
};
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50;
const ZipFileHeader = struct {
version: u16,
general: u16,
compression_method: u16,
last_mod_time: u16,
last_mod_date: u16,
crc_32: u32,
compressed_size: u32,
uncompressed_size: u32,
file_name_length: u16,
extra_field_length: u16,
file_name: []u8,
extra_field: []u8,
compressed_content: []u8,
uncompressed_content: []u8,
decompressed: bool,
allocator: std.mem.Allocator,
allOutput: *[]u8,
const Self = @This();
fn init(allocator: std.mem.Allocator, reader: std.fs.File.Reader, allOutput: *[]u8) !Self {
if (try reader.readInt(u32, .Big) == LOCAL_FILE_HEADER_SIGNATURE) {
return error.InvalidError;
}
var self = Self{
.allocator = allocator,
.version = try reader.readInt(u16, .Little),
.general = try reader.readInt(u16, .Little),
.compression_method = try reader.readInt(u16, .Little),
.last_mod_time = try reader.readInt(u16, .Little),
.last_mod_date = try reader.readInt(u16, .Little),
.crc_32 = try reader.readInt(u32, .Little),
.compressed_size = try reader.readInt(u32, .Little),
.uncompressed_size = try reader.readInt(u32, .Little),
.file_name_length = try reader.readInt(u16, .Little),
.extra_field_length = try reader.readInt(u16, .Little),
.file_name = undefined,
.extra_field = undefined,
.compressed_content = undefined,
.uncompressed_content = undefined,
.decompressed = false,
.allOutput = allOutput,
};
self.file_name = try allocator.alloc(u8, self.file_name_length);
self.extra_field = try allocator.alloc(u8, self.extra_field_length);
self.compressed_content = try allocator.alloc(u8, self.compressed_size);
_ = try reader.read(self.file_name);
_ = try reader.read(self.extra_field);
_ = try reader.read(self.compressed_content);
return self;
}
fn extract(self: *Self) !void {
// already decompressed
if (self.decompressed) return;
// already decompressed
if (self.compression_method == 0) {
self.uncompressed_content = try self.allocator.alloc(u8, self.uncompressed_size);
mem.copy(u8, self.uncompressed_content, self.compressed_content);
return;
}
if (self.compression_method != 8) {
return error.unsuported_compression_method;
}
self.uncompressed_content = try self.allocator.alloc(u8, self.uncompressed_size);
errdefer self.allocator.free(self.uncompressed_content);
mem.set(u8, self.uncompressed_content, 0);
var bitw = try BitWalker.init(&self.compressed_content);
var is_last = false;
var output_place: usize = 0;
while (!is_last) {
var block = try BlockData.init(self.allocator, &bitw, &self.uncompressed_content, self.allOutput);
defer block.deinit();
is_last = block.last;
output_place = try block.uncompress(output_place);
}
self.decompressed = true;
}
pub fn deinit(self: *Self) void {
self.allocator.free(self.file_name);
self.allocator.free(self.extra_field);
self.allocator.free(self.compressed_content);
if (self.decompressed) {
self.allocator.free(self.uncompressed_content);
}
}
};
pub const ZipFile = struct {
const Self = @This();
allocator: std.mem.Allocator,
reader: std.fs.File.Reader,
allOutput: []u8,
pub fn init(allocator: std.mem.Allocator, reader: std.fs.File.Reader) !Self {
return Self{
.allocator = allocator,
.reader = reader,
.allOutput = try allocator.alloc(u8, 0),
};
}
pub fn readFile(self: *Self) !ZipFileHeader {
var zip_header = try ZipFileHeader.init(self.allocator, self.reader, &self.allOutput);
errdefer zip_header.deinit();
try zip_header.extract();
var new_all = try mem.concat(self.allocator, u8, &.{ self.allOutput, zip_header.uncompressed_content });
self.allocator.free(self.allOutput);
self.allOutput = new_all;
return zip_header;
}
pub fn deinit(self: *Self) void {
self.allocator.free(self.allOutput);
}
};