chore: updated the huffman stuff

This commit is contained in:
Andre Henriques 2023-06-23 21:38:02 +01:00
parent e099a7fdda
commit a80e954aa3

View File

@ -3,169 +3,33 @@ const mem = std.mem;
const stdout = std.io.getStdOut().writer();
const stderr = std.io.getStdErr().writer();
fn print(comptime str: []const u8, params: anytype) void {
stdout.print(str ++ "\n", params) catch {};
}
const utils = @import("utils.zig");
const print = utils.print;
const exit = utils.exit;
const pErr = utils.pErr;
fn pErr(comptime str: []const u8, params: anytype) void {
stderr.print(str ++ "\n", params) catch {};
}
fn exit(comptime str: []const u8, params: anytype, exitCode: u8) void {
pErr(str, params);
std.os.exit(exitCode);
}
const walkers = @import("walker.zig");
const BitWalker = walkers.BitWalker;
const BitWalkerUint = walkers.BitWalkerUint;
fn usage() void {
pErr("reader <path>", .{});
exit("", .{}, 1);
}
fn BitWalkerUint(T: anytype, size: usize, reverse: bool) type {
var typeInfo = @typeInfo(T);
if (typeInfo == std.builtin.Type.Int) {
@compileError("This needs to be a int");
}
if (typeInfo.Int.is_signed) {
@compileError("The integer needs to be unsigned");
}
if (typeInfo.Int.bits < size) {
@compileError("Int size needs to be the same or grater than the size");
}
return struct {
value: T,
// TODO this is probably wrong
in_byte_position: u8,
const Self = @This();
fn init(value: T) Self {
var start_value = 0;
if (reverse) {
start_value = size;
}
return Self{
.t = value,
.in_byte_position = size,
};
}
fn walkBit(self: *Self) ?u1 {
if (reverse) {
if (self.in_byte_position == 0)
return null;
} else {
if (self.in_byte_position > size - 1)
return null;
}
var mask = 1;
for (0..self.in_byte_position) |_| {
mask = @shlExact(mask, 1);
}
if (reverse) {
self.in_byte_position -= 1;
} else {
self.in_byte_position += 1;
}
return self.value & mask;
}
};
}
const BitWalker = struct {
const Self = @This();
data: *[]u8,
position: usize = 0,
in_byte_position: u3 = 0,
direction: bool = false,
fn init(data: *[]u8, direction: bool) Self {
return Self{
.data = data,
.direction = direction,
};
}
// TODO direction
fn walk(self: *Self, bits: u3) !u8 {
if (bits > 8 or bits == 0) return error.invalid_bit_number;
var byte = self.data.ptr[self.position];
// jumps over bytes
if (self.in_byte_position + @as(u4, bits) > 8) {
// Generate a mast that covers the last part of the old byte
var old_mask: u8 = 0;
var i: usize = 0;
while (i < 8 - @as(u4, self.in_byte_position)) : (i += 1) {
old_mask = @shlExact(old_mask, 1) + 1;
}
old_mask = @shlExact(old_mask, self.in_byte_position);
var next_byte = self.data.ptr[self.position + 1];
var new_byte_pos: u3 = @intCast(u3, @as(u4, bits) - (8 - @as(u4, self.in_byte_position)));
var new_mask: u8 = 0;
var j: usize = 0;
while (j < new_byte_pos) : (j += 1) {
new_mask = @shlExact(new_mask, 1) + 1;
}
var result = @shrExact(byte & old_mask, self.in_byte_position) + @shlExact(next_byte & new_mask, @intCast(u3, 8 - @as(u4, self.in_byte_position)));
//print("mask: {b}, new_mask: {b}", .{ old_mask, new_mask });
//print("here {b} {b}", .{ byte, old_mask });
//print("here_new {b} {b}", .{ next_byte, new_mask });
//print("result {}", .{result});
self.position += 1;
self.in_byte_position = new_byte_pos;
return result;
}
// Generate a mast that covers the last part of the old byte
var old_mask: u8 = 0;
var i: usize = 0;
while (i < bits) : (i += 1) {
old_mask = @shlExact(old_mask, 1) + 1;
}
old_mask = @shlExact(old_mask, self.in_byte_position);
const result = @shrExact(byte & old_mask, self.in_byte_position);
const sum = @intCast(u4, self.in_byte_position) + @intCast(u4, bits);
if (sum == 8) {
self.position += 1;
self.in_byte_position = 0;
} else {
self.in_byte_position += bits;
}
return result;
}
};
fn HuffmanGraph(comptime valueType: type) type {
return struct {
const Node = struct {
left: ?Node,
right: ?Node,
const NodeSelf = @This();
left: ?*NodeSelf,
right: ?*NodeSelf,
value: ?valueType,
allocator: mem.Allocator,
fn init(allocator: mem.Allocator) !*Node {
var node = try allocator.create(Node);
fn init(allocator: mem.Allocator) !*NodeSelf {
var node = try allocator.create(NodeSelf);
node.left = null;
node.right = null;
node.value = null;
@ -174,18 +38,40 @@ fn HuffmanGraph(comptime valueType: type) type {
return node;
}
fn deinit(self: *Node) void {
fn deinit(self: *NodeSelf) void {
if (self.left) |left| {
left.deinit();
}
if (self.right) |right| {
right.deinit();
}
self.allocator.destory(self);
self.allocator.destroy(self);
}
fn depth(self: *Node) usize {
var d = 0;
fn print(self: *NodeSelf, curDepth: usize, targetDepth: usize) void {
if (curDepth != targetDepth) {
if (self.left) |l| {
l.print(curDepth + 1, targetDepth);
} else {
utils.printf(" , ", .{});
}
if (self.right) |r| {
r.print(curDepth + 1, targetDepth);
} else {
utils.printf(" . ", .{});
}
return;
}
if (self.value) |v| {
utils.printf(" {any} ", .{v});
} else {
utils.printf(" _ ", .{});
}
}
fn depth(self: *NodeSelf) usize {
var d: usize = 0;
if (self.right) |r| {
d = r.depth();
}
@ -213,10 +99,17 @@ fn HuffmanGraph(comptime valueType: type) type {
};
}
fn addValue(self: *Self, code: u64, size: usize, value: valueType) !void {
var walker = BitWalkerUint(u64, size, true).init(code);
fn print(self: *Self) void {
for (0..(self.depth() + 1)) |i| {
self.root.print(0, i);
utils.printf("\n", .{});
}
}
var curNode: *Node = self.node;
fn addValue(self: *Self, code: u64, size: u8, value: valueType) !void {
var walker = try BitWalkerUint(u64, true).init(code, size);
var curNode: *Node = self.root;
while (walker.walkBit()) |bit| {
if (bit == 1) {
@ -295,23 +188,21 @@ const DynamicDecoder = struct {
var bl_count: [7]u3 = .{ 0, 0, 0, 0, 0, 0, 0 };
var max: u8 = 0;
for (0..(len_to_read + 4)) |i| {
for (0..len_to_read) |i| {
var data: u3 = @intCast(u3, try walker.walk(3));
lenList[i] = data;
if (data == 0) {
continue;
}
bl_count[data - 1] += 1;
bl_count[data] += 1;
if (data > max) {
max = data;
}
}
var next_code: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
var code: u64 = 0;
bl_count[0] = 0;
for (1..max) |bits| {
for (1..(max + 1)) |bits| {
code = @shlExact(code + bl_count[bits - 1], 1);
next_code[bits] = code;
}
@ -348,6 +239,75 @@ const DynamicDecoder = struct {
}
};
const BlockData = struct {
const Self = @This();
bitw: *BitWalker,
allocator: mem.Allocator,
last: bool,
blockType: u8,
fn init(allocator: mem.Allocator, bitw: *BitWalker) !Self {
return Self{
.bitw = bitw,
.allocator = allocator,
.last = try bitw.walk(1) == 1,
.blockType = try bitw.walk(2),
};
}
fn uncompress(self: *Self) !void {
if (self.blockType != 2) {
return error.unsuported_block_type;
}
try self.dynamic_huffman;
}
fn dynamic_huffman() !void {
var bitw = self.bitw;
var number_of_literal_codes: u32 = @as(u32, try bitw.walk(5)) + 257;
var number_of_dist_codes = try bitw.walk(5) + 1;
var number_of_length_codes = try bitw.walk(4) + 4;
print("number of literal codes: {}", .{number_of_literal_codes});
print("number of dist codes: {}", .{number_of_dist_codes});
print("number_of_length_codes: {}", .{number_of_length_codes});
var dynamic_decoder = try DynamicDecoder.init(bitw, number_of_length_codes, self.allocator);
defer dynamic_decoder.deinit();
dynamic_decoder.graph.print();
var code_len: usize = 0;
while (code_len < number_of_literal_codes) {
var decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk());
while (decode_value == null) {
decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk());
}
print("Test {any}", .{decode_value});
if (decode_value.? == 16) {
return error.not_implemented;
} else if (decode_value.? == 17) {
code_len += try bitw.walk(3);
} else if (decode_value.? == 18) {
code_len += try bitw.walk(7);
} else {
code_len += 1;
}
}
return error.todo;
}
fn read_len_code() void {}
};
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50;
const ZipFileHeader = struct {
version: u16,
@ -424,32 +384,15 @@ const ZipFileHeader = struct {
var bitw = BitWalker.init(&self.compressed_content, false);
var lastBlock = try bitw.walk(1) == 1;
var blockType = try bitw.walk(2);
var block = try BlockData.init(self.allocator, &bitw);
if (lastBlock) {
if (block.last) {
print("last block", .{});
} else {
print("not last block", .{});
}
print("block_type: {}", .{blockType});
if (blockType != 2) {
return error.unsuported_block_type;
}
var number_of_literal_codes = try bitw.walk(5);
var number_of_dist_codes = try bitw.walk(5);
var number_of_length_codes = try bitw.walk(4);
print("number of literal codes: {}", .{number_of_literal_codes});
print("number of dist codes: {}", .{number_of_dist_codes});
print("number_of_length_codes: {}", .{number_of_length_codes});
var dynamic_decoder = try DynamicDecoder.init(&bitw, number_of_length_codes, self.allocator);
_ = dynamic_decoder;
try block.uncompress();
self.decompressed = true;
}