more work on the huffman

This commit is contained in:
Andre Henriques 2023-06-14 20:58:56 +01:00
parent 99d8ce3484
commit e099a7fdda

View File

@ -21,8 +21,7 @@ fn usage() void {
exit("", .{}, 1); exit("", .{}, 1);
} }
fn BitWalkerUint(T: anytype) type { fn BitWalkerUint(T: anytype, size: usize, reverse: bool) type {
var typeInfo = @typeInfo(T); var typeInfo = @typeInfo(T);
if (typeInfo == std.builtin.Type.Int) { if (typeInfo == std.builtin.Type.Int) {
@ -33,21 +32,51 @@ fn BitWalkerUint(T: anytype) type {
@compileError("The integer needs to be unsigned"); @compileError("The integer needs to be unsigned");
} }
var size = typeInfo.Int.bits; if (typeInfo.Int.bits < size) {
@compileError("Int size needs to be the same or grater than the size");
}
return struct { return struct {
t: T, value: T,
// TODO this is probably wrong // TODO this is probably wrong
in_byte_position: u8, in_byte_position: u8,
const Self = @This(); const Self = @This();
fn init(value: T) Self { fn init(value: T) Self {
return Self { var start_value = 0;
if (reverse) {
start_value = size;
}
return Self{
.t = value, .t = value,
.in_byte_position = 0, .in_byte_position = size,
}; };
} }
fn walkBit(self: *Self) ?u1 {
if (reverse) {
if (self.in_byte_position == 0)
return null;
} else {
if (self.in_byte_position > size - 1)
return null;
}
var mask = 1;
for (0..self.in_byte_position) |_| {
mask = @shlExact(mask, 1);
}
if (reverse) {
self.in_byte_position -= 1;
} else {
self.in_byte_position += 1;
}
return self.value & mask;
}
}; };
} }
@ -126,7 +155,7 @@ const BitWalker = struct {
} }
}; };
fn HuffmanGraph(valueType: type) type { fn HuffmanGraph(comptime valueType: type) type {
return struct { return struct {
const Node = struct { const Node = struct {
left: ?Node, left: ?Node,
@ -136,7 +165,6 @@ fn HuffmanGraph(valueType: type) type {
allocator: mem.Allocator, allocator: mem.Allocator,
fn init(allocator: mem.Allocator) !*Node { fn init(allocator: mem.Allocator) !*Node {
var node = try allocator.create(Node); var node = try allocator.create(Node);
node.left = null; node.left = null;
node.right = null; node.right = null;
@ -155,19 +183,96 @@ fn HuffmanGraph(valueType: type) type {
} }
self.allocator.destory(self); self.allocator.destory(self);
} }
fn depth(self: *Node) usize {
var d = 0;
if (self.right) |r| {
d = r.depth();
}
if (self.left) |l| {
d = std.math.max(d, l.depth());
}
return d + 1;
}
}; };
root: Node, root: *Node,
allocator: mem.Allocator,
stored_depth: ?usize = null,
iterNode: *Node,
const Self = @This(); const Self = @This();
fn init(allocator: mem.Allocator) !Self { fn init(allocator: mem.Allocator) !Self {
var root = try Node.init(allocator); var root = try Node.init(allocator);
return Self { return Self{
root, .root = root,
.iterNode = root,
.allocator = allocator,
}; };
} }
fn addValue(self: *Self, code: u64, size: usize, value: valueType) !void {
var walker = BitWalkerUint(u64, size, true).init(code);
var curNode: *Node = self.node;
while (walker.walkBit()) |bit| {
if (bit == 1) {
if (curNode.left) |nextNode| {
curNode = nextNode;
} else {
curNode.left = try Node.init(self.allocator);
curNode = curNode.left.?;
}
} else {
if (curNode.right) |nextNode| {
curNode = nextNode;
} else {
curNode.right = try Node.init(self.allocator);
curNode = curNode.right.?;
}
}
}
curNode.value = value;
self.stored_depth = null;
}
fn depth(self: *Self) usize {
if (self.stored_depth) |d| {
return d;
}
self.stored_depth = self.root.depth() - 1;
return self.stored_depth.?;
}
fn iter(self: *Self, bit: u1) anyerror!?valueType {
var node = self.iterNode;
var nextNode: ?*Node = null;
if (bit == 1) {
nextNode = node.left;
} else {
nextNode = node.right;
}
if (nextNode) |new_node| {
if (new_node.value) |value| {
self.iterNode = self.root;
return value;
} else {
self.iterNode = new_node;
return null;
}
}
return error.InvalidBitSequence;
}
fn deinit(self: *Self) void { fn deinit(self: *Self) void {
self.root.deinit(); self.root.deinit();
} }
@ -178,15 +283,17 @@ const DynamicDecoder = struct {
const Self = @This(); const Self = @This();
len_to_read: usize, len_to_read: usize,
codes: [19]u3, codes: [19]u64,
walker: *BitWalker, walker: *BitWalker,
allocator: mem.Allocator, allocator: mem.Allocator,
graph: HuffmanGraph(u64),
fn init(walker: *BitWalker, len_to_read: usize, allocator: mem.Allocator) !Self { fn init(walker: *BitWalker, len_to_read: usize, allocator: mem.Allocator) !Self {
const list: [19]u8 = .{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; const list: [19]u8 = .{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
var lenList: [19]u3 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; var lenList: [19]u3 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
var bl_count: [7]u8 = .{0, 0, 0, 0, 0, 0, 0}; var bl_count: [7]u3 = .{ 0, 0, 0, 0, 0, 0, 0 };
var max: u8 = 0;
for (0..(len_to_read + 4)) |i| { for (0..(len_to_read + 4)) |i| {
var data: u3 = @intCast(u3, try walker.walk(3)); var data: u3 = @intCast(u3, try walker.walk(3));
@ -195,36 +302,50 @@ const DynamicDecoder = struct {
continue; continue;
} }
bl_count[data - 1] += 1; bl_count[data - 1] += 1;
if (data > max) {
max = data;
}
} }
var bl_count: []u8 = allocator.allocator(u8, max - 1); var next_code: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
var next_codes: [19]u3 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; var code: u64 = 0;
var code = 0;
bl_count[0] = 0; bl_count[0] = 0;
for (1..max) |bits| { for (1..max) |bits| {
code = (code + bl_count[bits-1]) << 1; code = @shlExact(code + bl_count[bits - 1], 1);
next_code[bits] = code; next_code[bits] = code;
} }
var codes: [19]u3 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; var codes: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (0..19) |n| { for (0..19) |n| {
var len = lenList[n]; var len = lenList[n];
if (len != 0) { if (len != 0) {
codes = next_code[len]; codes[n] = next_code[len];
next_code[len] += 1; next_code[len] += 1;
} }
} }
var graph = try HuffmanGraph(u64).init(allocator);
for (0..19) |i| {
if (lenList[i] == 0)
continue;
try graph.addValue(codes[i], lenList[i], list[i]);
}
return .{ return .{
.len_to_read = len_to_read, .len_to_read = len_to_read,
.codes = codes, .codes = codes,
.walker = walker, .walker = walker,
.allocator = allocator, .allocator = allocator,
.graph = graph,
}; };
} }
fn deinit(self: *Self) void {
self.graph.deinit();
}
}; };
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50; const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50;
@ -326,7 +447,7 @@ const ZipFileHeader = struct {
print("number of dist codes: {}", .{number_of_dist_codes}); print("number of dist codes: {}", .{number_of_dist_codes});
print("number_of_length_codes: {}", .{number_of_length_codes}); print("number_of_length_codes: {}", .{number_of_length_codes});
var dynamic_decoder = try DynamicDecoder.init(&bitw, number_of_length_codes); var dynamic_decoder = try DynamicDecoder.init(&bitw, number_of_length_codes, self.allocator);
_ = dynamic_decoder; _ = dynamic_decoder;