chore: updated the huffman stuff
This commit is contained in:
parent
e099a7fdda
commit
a80e954aa3
305
src/main.zig
305
src/main.zig
@ -3,169 +3,33 @@ const mem = std.mem;
|
|||||||
const stdout = std.io.getStdOut().writer();
|
const stdout = std.io.getStdOut().writer();
|
||||||
const stderr = std.io.getStdErr().writer();
|
const stderr = std.io.getStdErr().writer();
|
||||||
|
|
||||||
fn print(comptime str: []const u8, params: anytype) void {
|
const utils = @import("utils.zig");
|
||||||
stdout.print(str ++ "\n", params) catch {};
|
const print = utils.print;
|
||||||
}
|
const exit = utils.exit;
|
||||||
|
const pErr = utils.pErr;
|
||||||
|
|
||||||
fn pErr(comptime str: []const u8, params: anytype) void {
|
const walkers = @import("walker.zig");
|
||||||
stderr.print(str ++ "\n", params) catch {};
|
const BitWalker = walkers.BitWalker;
|
||||||
}
|
const BitWalkerUint = walkers.BitWalkerUint;
|
||||||
|
|
||||||
fn exit(comptime str: []const u8, params: anytype, exitCode: u8) void {
|
|
||||||
pErr(str, params);
|
|
||||||
std.os.exit(exitCode);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn usage() void {
|
fn usage() void {
|
||||||
pErr("reader <path>", .{});
|
pErr("reader <path>", .{});
|
||||||
exit("", .{}, 1);
|
exit("", .{}, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn BitWalkerUint(T: anytype, size: usize, reverse: bool) type {
|
|
||||||
var typeInfo = @typeInfo(T);
|
|
||||||
|
|
||||||
if (typeInfo == std.builtin.Type.Int) {
|
|
||||||
@compileError("This needs to be a int");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeInfo.Int.is_signed) {
|
|
||||||
@compileError("The integer needs to be unsigned");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (typeInfo.Int.bits < size) {
|
|
||||||
@compileError("Int size needs to be the same or grater than the size");
|
|
||||||
}
|
|
||||||
|
|
||||||
return struct {
|
|
||||||
value: T,
|
|
||||||
// TODO this is probably wrong
|
|
||||||
in_byte_position: u8,
|
|
||||||
|
|
||||||
const Self = @This();
|
|
||||||
|
|
||||||
fn init(value: T) Self {
|
|
||||||
var start_value = 0;
|
|
||||||
if (reverse) {
|
|
||||||
start_value = size;
|
|
||||||
}
|
|
||||||
return Self{
|
|
||||||
.t = value,
|
|
||||||
.in_byte_position = size,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
fn walkBit(self: *Self) ?u1 {
|
|
||||||
if (reverse) {
|
|
||||||
if (self.in_byte_position == 0)
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
if (self.in_byte_position > size - 1)
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
var mask = 1;
|
|
||||||
|
|
||||||
for (0..self.in_byte_position) |_| {
|
|
||||||
mask = @shlExact(mask, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reverse) {
|
|
||||||
self.in_byte_position -= 1;
|
|
||||||
} else {
|
|
||||||
self.in_byte_position += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return self.value & mask;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const BitWalker = struct {
|
|
||||||
const Self = @This();
|
|
||||||
|
|
||||||
data: *[]u8,
|
|
||||||
position: usize = 0,
|
|
||||||
in_byte_position: u3 = 0,
|
|
||||||
direction: bool = false,
|
|
||||||
|
|
||||||
fn init(data: *[]u8, direction: bool) Self {
|
|
||||||
return Self{
|
|
||||||
.data = data,
|
|
||||||
.direction = direction,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO direction
|
|
||||||
fn walk(self: *Self, bits: u3) !u8 {
|
|
||||||
if (bits > 8 or bits == 0) return error.invalid_bit_number;
|
|
||||||
|
|
||||||
var byte = self.data.ptr[self.position];
|
|
||||||
|
|
||||||
// jumps over bytes
|
|
||||||
if (self.in_byte_position + @as(u4, bits) > 8) {
|
|
||||||
// Generate a mast that covers the last part of the old byte
|
|
||||||
var old_mask: u8 = 0;
|
|
||||||
var i: usize = 0;
|
|
||||||
while (i < 8 - @as(u4, self.in_byte_position)) : (i += 1) {
|
|
||||||
old_mask = @shlExact(old_mask, 1) + 1;
|
|
||||||
}
|
|
||||||
old_mask = @shlExact(old_mask, self.in_byte_position);
|
|
||||||
|
|
||||||
var next_byte = self.data.ptr[self.position + 1];
|
|
||||||
var new_byte_pos: u3 = @intCast(u3, @as(u4, bits) - (8 - @as(u4, self.in_byte_position)));
|
|
||||||
|
|
||||||
var new_mask: u8 = 0;
|
|
||||||
var j: usize = 0;
|
|
||||||
while (j < new_byte_pos) : (j += 1) {
|
|
||||||
new_mask = @shlExact(new_mask, 1) + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
var result = @shrExact(byte & old_mask, self.in_byte_position) + @shlExact(next_byte & new_mask, @intCast(u3, 8 - @as(u4, self.in_byte_position)));
|
|
||||||
|
|
||||||
//print("mask: {b}, new_mask: {b}", .{ old_mask, new_mask });
|
|
||||||
//print("here {b} {b}", .{ byte, old_mask });
|
|
||||||
//print("here_new {b} {b}", .{ next_byte, new_mask });
|
|
||||||
//print("result {}", .{result});
|
|
||||||
|
|
||||||
self.position += 1;
|
|
||||||
self.in_byte_position = new_byte_pos;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate a mast that covers the last part of the old byte
|
|
||||||
var old_mask: u8 = 0;
|
|
||||||
var i: usize = 0;
|
|
||||||
while (i < bits) : (i += 1) {
|
|
||||||
old_mask = @shlExact(old_mask, 1) + 1;
|
|
||||||
}
|
|
||||||
old_mask = @shlExact(old_mask, self.in_byte_position);
|
|
||||||
|
|
||||||
const result = @shrExact(byte & old_mask, self.in_byte_position);
|
|
||||||
|
|
||||||
const sum = @intCast(u4, self.in_byte_position) + @intCast(u4, bits);
|
|
||||||
if (sum == 8) {
|
|
||||||
self.position += 1;
|
|
||||||
self.in_byte_position = 0;
|
|
||||||
} else {
|
|
||||||
self.in_byte_position += bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
fn HuffmanGraph(comptime valueType: type) type {
|
fn HuffmanGraph(comptime valueType: type) type {
|
||||||
return struct {
|
return struct {
|
||||||
const Node = struct {
|
const Node = struct {
|
||||||
left: ?Node,
|
const NodeSelf = @This();
|
||||||
right: ?Node,
|
|
||||||
|
left: ?*NodeSelf,
|
||||||
|
right: ?*NodeSelf,
|
||||||
value: ?valueType,
|
value: ?valueType,
|
||||||
|
|
||||||
allocator: mem.Allocator,
|
allocator: mem.Allocator,
|
||||||
|
|
||||||
fn init(allocator: mem.Allocator) !*Node {
|
fn init(allocator: mem.Allocator) !*NodeSelf {
|
||||||
var node = try allocator.create(Node);
|
var node = try allocator.create(NodeSelf);
|
||||||
node.left = null;
|
node.left = null;
|
||||||
node.right = null;
|
node.right = null;
|
||||||
node.value = null;
|
node.value = null;
|
||||||
@ -174,18 +38,40 @@ fn HuffmanGraph(comptime valueType: type) type {
|
|||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn deinit(self: *Node) void {
|
fn deinit(self: *NodeSelf) void {
|
||||||
if (self.left) |left| {
|
if (self.left) |left| {
|
||||||
left.deinit();
|
left.deinit();
|
||||||
}
|
}
|
||||||
if (self.right) |right| {
|
if (self.right) |right| {
|
||||||
right.deinit();
|
right.deinit();
|
||||||
}
|
}
|
||||||
self.allocator.destory(self);
|
self.allocator.destroy(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn depth(self: *Node) usize {
|
fn print(self: *NodeSelf, curDepth: usize, targetDepth: usize) void {
|
||||||
var d = 0;
|
if (curDepth != targetDepth) {
|
||||||
|
if (self.left) |l| {
|
||||||
|
l.print(curDepth + 1, targetDepth);
|
||||||
|
} else {
|
||||||
|
utils.printf(" , ", .{});
|
||||||
|
}
|
||||||
|
if (self.right) |r| {
|
||||||
|
r.print(curDepth + 1, targetDepth);
|
||||||
|
} else {
|
||||||
|
utils.printf(" . ", .{});
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self.value) |v| {
|
||||||
|
utils.printf(" {any} ", .{v});
|
||||||
|
} else {
|
||||||
|
utils.printf(" _ ", .{});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn depth(self: *NodeSelf) usize {
|
||||||
|
var d: usize = 0;
|
||||||
if (self.right) |r| {
|
if (self.right) |r| {
|
||||||
d = r.depth();
|
d = r.depth();
|
||||||
}
|
}
|
||||||
@ -213,10 +99,17 @@ fn HuffmanGraph(comptime valueType: type) type {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn addValue(self: *Self, code: u64, size: usize, value: valueType) !void {
|
fn print(self: *Self) void {
|
||||||
var walker = BitWalkerUint(u64, size, true).init(code);
|
for (0..(self.depth() + 1)) |i| {
|
||||||
|
self.root.print(0, i);
|
||||||
|
utils.printf("\n", .{});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var curNode: *Node = self.node;
|
fn addValue(self: *Self, code: u64, size: u8, value: valueType) !void {
|
||||||
|
var walker = try BitWalkerUint(u64, true).init(code, size);
|
||||||
|
|
||||||
|
var curNode: *Node = self.root;
|
||||||
|
|
||||||
while (walker.walkBit()) |bit| {
|
while (walker.walkBit()) |bit| {
|
||||||
if (bit == 1) {
|
if (bit == 1) {
|
||||||
@ -295,23 +188,21 @@ const DynamicDecoder = struct {
|
|||||||
var bl_count: [7]u3 = .{ 0, 0, 0, 0, 0, 0, 0 };
|
var bl_count: [7]u3 = .{ 0, 0, 0, 0, 0, 0, 0 };
|
||||||
var max: u8 = 0;
|
var max: u8 = 0;
|
||||||
|
|
||||||
for (0..(len_to_read + 4)) |i| {
|
for (0..len_to_read) |i| {
|
||||||
var data: u3 = @intCast(u3, try walker.walk(3));
|
var data: u3 = @intCast(u3, try walker.walk(3));
|
||||||
lenList[i] = data;
|
lenList[i] = data;
|
||||||
if (data == 0) {
|
if (data == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
bl_count[data - 1] += 1;
|
bl_count[data] += 1;
|
||||||
if (data > max) {
|
if (data > max) {
|
||||||
max = data;
|
max = data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var next_code: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
var next_code: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
|
||||||
var code: u64 = 0;
|
var code: u64 = 0;
|
||||||
bl_count[0] = 0;
|
for (1..(max + 1)) |bits| {
|
||||||
for (1..max) |bits| {
|
|
||||||
code = @shlExact(code + bl_count[bits - 1], 1);
|
code = @shlExact(code + bl_count[bits - 1], 1);
|
||||||
next_code[bits] = code;
|
next_code[bits] = code;
|
||||||
}
|
}
|
||||||
@ -348,6 +239,75 @@ const DynamicDecoder = struct {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const BlockData = struct {
|
||||||
|
const Self = @This();
|
||||||
|
|
||||||
|
bitw: *BitWalker,
|
||||||
|
allocator: mem.Allocator,
|
||||||
|
|
||||||
|
last: bool,
|
||||||
|
blockType: u8,
|
||||||
|
|
||||||
|
fn init(allocator: mem.Allocator, bitw: *BitWalker) !Self {
|
||||||
|
return Self{
|
||||||
|
.bitw = bitw,
|
||||||
|
.allocator = allocator,
|
||||||
|
.last = try bitw.walk(1) == 1,
|
||||||
|
.blockType = try bitw.walk(2),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn uncompress(self: *Self) !void {
|
||||||
|
if (self.blockType != 2) {
|
||||||
|
return error.unsuported_block_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
try self.dynamic_huffman;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dynamic_huffman() !void {
|
||||||
|
var bitw = self.bitw;
|
||||||
|
|
||||||
|
var number_of_literal_codes: u32 = @as(u32, try bitw.walk(5)) + 257;
|
||||||
|
var number_of_dist_codes = try bitw.walk(5) + 1;
|
||||||
|
var number_of_length_codes = try bitw.walk(4) + 4;
|
||||||
|
|
||||||
|
print("number of literal codes: {}", .{number_of_literal_codes});
|
||||||
|
print("number of dist codes: {}", .{number_of_dist_codes});
|
||||||
|
print("number_of_length_codes: {}", .{number_of_length_codes});
|
||||||
|
|
||||||
|
var dynamic_decoder = try DynamicDecoder.init(bitw, number_of_length_codes, self.allocator);
|
||||||
|
defer dynamic_decoder.deinit();
|
||||||
|
|
||||||
|
dynamic_decoder.graph.print();
|
||||||
|
|
||||||
|
var code_len: usize = 0;
|
||||||
|
while (code_len < number_of_literal_codes) {
|
||||||
|
var decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk());
|
||||||
|
while (decode_value == null) {
|
||||||
|
decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk());
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Test {any}", .{decode_value});
|
||||||
|
|
||||||
|
if (decode_value.? == 16) {
|
||||||
|
return error.not_implemented;
|
||||||
|
} else if (decode_value.? == 17) {
|
||||||
|
code_len += try bitw.walk(3);
|
||||||
|
} else if (decode_value.? == 18) {
|
||||||
|
code_len += try bitw.walk(7);
|
||||||
|
} else {
|
||||||
|
code_len += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return error.todo;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_len_code() void {}
|
||||||
|
};
|
||||||
|
|
||||||
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50;
|
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50;
|
||||||
const ZipFileHeader = struct {
|
const ZipFileHeader = struct {
|
||||||
version: u16,
|
version: u16,
|
||||||
@ -424,32 +384,15 @@ const ZipFileHeader = struct {
|
|||||||
|
|
||||||
var bitw = BitWalker.init(&self.compressed_content, false);
|
var bitw = BitWalker.init(&self.compressed_content, false);
|
||||||
|
|
||||||
var lastBlock = try bitw.walk(1) == 1;
|
var block = try BlockData.init(self.allocator, &bitw);
|
||||||
var blockType = try bitw.walk(2);
|
|
||||||
|
|
||||||
if (lastBlock) {
|
if (block.last) {
|
||||||
print("last block", .{});
|
print("last block", .{});
|
||||||
} else {
|
} else {
|
||||||
print("not last block", .{});
|
print("not last block", .{});
|
||||||
}
|
}
|
||||||
|
|
||||||
print("block_type: {}", .{blockType});
|
try block.uncompress();
|
||||||
|
|
||||||
if (blockType != 2) {
|
|
||||||
return error.unsuported_block_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
var number_of_literal_codes = try bitw.walk(5);
|
|
||||||
var number_of_dist_codes = try bitw.walk(5);
|
|
||||||
var number_of_length_codes = try bitw.walk(4);
|
|
||||||
|
|
||||||
print("number of literal codes: {}", .{number_of_literal_codes});
|
|
||||||
print("number of dist codes: {}", .{number_of_dist_codes});
|
|
||||||
print("number_of_length_codes: {}", .{number_of_length_codes});
|
|
||||||
|
|
||||||
var dynamic_decoder = try DynamicDecoder.init(&bitw, number_of_length_codes, self.allocator);
|
|
||||||
|
|
||||||
_ = dynamic_decoder;
|
|
||||||
|
|
||||||
self.decompressed = true;
|
self.decompressed = true;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user