basic zig working

This commit is contained in:
Andre Henriques 2023-06-24 20:33:32 +01:00
parent 6264dd9e87
commit 8820f239b8
2 changed files with 443 additions and 195 deletions

View File

@ -80,6 +80,25 @@ fn HuffmanGraph(comptime valueType: type) type {
}
return d + 1;
}
fn valid(self: *NodeSelf) bool {
var has_child = self.right == null and self.left == null;
if (self.value != null)
return has_child;
if (has_child)
return false;
if (self.right) |r|
if (!r.valid())
return false;
if (self.left) |l|
if (!l.valid())
return false;
return true;
}
};
root: *Node,
@ -100,14 +119,15 @@ fn HuffmanGraph(comptime valueType: type) type {
}
fn print(self: *Self) void {
for (0..(self.depth() + 1)) |i| {
var d = self.depth() + 1;
for (0..d) |i| {
self.root.print(0, i);
utils.printf("\n", .{});
}
}
fn addValue(self: *Self, code: u64, size: u8, value: valueType) !void {
var walker = try BitWalkerUint(u64, true).init(code, size);
fn addValue(self: *Self, code: u64, size: usize, value: valueType) !void {
var walker = try BitWalkerUint(u64).init(code, size, true);
var curNode: *Node = self.root;
@ -166,78 +186,206 @@ fn HuffmanGraph(comptime valueType: type) type {
return error.InvalidBitSequence;
}
fn nextBitW(self: *Self, bitw: *BitWalker) !valueType {
while (true) {
if (try self.iter(try bitw.bitWalk())) |value|
return value;
}
}
fn valid(self: *Self) bool {
return self.root.valid();
}
fn deinit(self: *Self) void {
self.root.deinit();
}
};
}
const DynamicDecoder = struct {
const Self = @This();
fn create_dynamic_graph(walker: *BitWalker, len_to_read: usize, allocator: mem.Allocator) !HuffmanGraph(u64) {
const list: [19]u8 = .{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
len_to_read: usize,
codes: [19]u64,
walker: *BitWalker,
allocator: mem.Allocator,
graph: HuffmanGraph(u64),
const lens: []u8 = try allocator.alloc(u8, 19);
defer allocator.free(lens);
mem.set(u8, lens, 0);
fn init(walker: *BitWalker, len_to_read: usize, allocator: mem.Allocator) !Self {
const list: [19]u8 = .{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
var lenList: [19]u3 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
var bl_count: [7]u3 = .{ 0, 0, 0, 0, 0, 0, 0 };
var max: u8 = 0;
for (0..len_to_read) |i| {
var data: u3 = @intCast(u3, try walker.walk(3));
lenList[i] = data;
if (data == 0) {
continue;
}
bl_count[data] += 1;
if (data > max) {
max = data;
}
}
var next_code: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
var code: u64 = 0;
for (1..(max + 1)) |bits| {
code = @shlExact(code + bl_count[bits - 1], 1);
next_code[bits] = code;
}
var codes: [19]u64 = .{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for (0..19) |n| {
var len = lenList[n];
if (len != 0) {
codes[n] = next_code[len];
next_code[len] += 1;
}
}
var graph = try HuffmanGraph(u64).init(allocator);
for (0..19) |i| {
if (lenList[i] == 0)
continue;
try graph.addValue(codes[i], lenList[i], list[i]);
}
return .{
.len_to_read = len_to_read,
.codes = codes,
.walker = walker,
.allocator = allocator,
.graph = graph,
};
for (0..len_to_read) |i| {
lens[list[i]] = @intCast(u8, try walker.walk(3));
}
fn deinit(self: *Self) void {
self.graph.deinit();
var builder = try GraphBuilder(u64).init(allocator, 19, 0, 8);
defer builder.deinit();
while (!builder.done()) {
builder.addValue(builder.i, lens[builder.i]);
}
};
return try builder.build();
}
fn GraphBuilder(comptime T: type) type {
return struct {
const Self = @This();
allocator: mem.Allocator,
values: []T,
lens: []usize,
bl_count: []usize,
size: usize,
max: usize = 0,
i: usize = 0,
pub fn init(allocator: mem.Allocator, size: usize, default: T, maxlen: usize) !Self {
var values = try allocator.alloc(T, size);
mem.set(T, values, default);
var lens = try allocator.alloc(usize, size);
mem.set(usize, lens, 0);
var bl_count = try allocator.alloc(usize, maxlen);
mem.set(usize, bl_count, 0);
return Self{
.allocator = allocator,
.bl_count = bl_count,
.size = size,
.values = values,
.lens = lens,
};
}
pub fn skip(self: *Self, len: usize) void {
self.i += len;
}
pub fn done(self: *Self) bool {
return self.i >= self.size;
}
pub fn addValue(self: *Self, value: T, len: usize) void {
if (len > self.max) {
self.max = len;
}
self.values[self.i] = value;
self.lens[self.i] = len;
self.bl_count[len] += 1;
self.i += 1;
}
pub fn build(self: *Self) !HuffmanGraph(T) {
self.bl_count[0] = 0;
var next_code = try self.allocator.alloc(usize, self.max + 1);
defer self.allocator.free(next_code);
std.mem.set(usize, next_code, 0);
var code: usize = 0;
for (1..(self.max + 1)) |bits| {
code = @shlExact(code + self.bl_count[bits - 1], 1);
next_code[bits] = code;
}
var codes = try self.allocator.alloc(u64, self.size);
defer self.allocator.free(codes);
std.mem.set(usize, codes, 0);
for (0..self.size) |n| {
var len = self.lens[n];
if (len != 0) {
codes[n] = next_code[len];
next_code[len] += 1;
}
}
var graph = try HuffmanGraph(T).init(self.allocator);
errdefer graph.deinit();
for (0..self.size) |i| {
if (self.lens[i] == 0)
continue;
try graph.addValue(codes[i], self.lens[i], self.values[i]);
}
if (!graph.valid())
return error.graph_not_valid;
return graph;
}
pub fn buildWithDynamic(self: *Self, bitw: *BitWalker, graph: *HuffmanGraph(u64)) !HuffmanGraph(T) {
while (!self.done()) {
var v = try graph.nextBitW(bitw);
if (v == 16) {
return error.not_implemented;
} else if (v == 17) {
self.skip(try bitw.walk(3) + 3);
} else if (v == 18) {
self.skip(try bitw.walk(7) + 11);
} else {
self.addValue(self.i, v);
}
}
return try self.build();
}
pub fn deinit(self: *Self) void {
self.allocator.free(self.values);
self.allocator.free(self.lens);
self.allocator.free(self.bl_count);
}
};
}
fn get_len_value(bitw: *BitWalker, len_code: usize) !usize {
var extra_bits: usize = switch (len_code) {
257...264 => 0,
265...268 => 1,
269...272 => 2,
273...277 => 3,
278...280 => 4,
281...284 => 5,
285 => 0,
else => unreachable,
};
var to_add = try bitw.walk(extra_bits);
var base_value: usize = switch (len_code) {
257...264 => 3 + len_code - 257,
265...268 => 11 + (len_code - 265) * 2,
269...272 => 19 + (len_code - 269) * 3,
273...277 => 35 + (len_code - 273) * 7,
278...280 => 67 + (len_code - 278) * 15,
281...284 => 131 + (len_code - 281) * 31,
285 => 0,
else => unreachable,
};
return base_value + to_add;
}
fn get_dist_value(bitw: *BitWalker, dist_graph: *HuffmanGraph(u64)) !usize {
var dist = try dist_graph.nextBitW(bitw);
var extra_bits: usize = switch (dist) {
0...3 => 0,
4...29 => (dist / 2) - 1,
else => unreachable,
};
var to_add = try bitw.walk(extra_bits);
var base_value: usize = switch (dist) {
0...3 => dist + 1,
4...29 => std.math.pow(usize, 2, dist / 2) + (std.math.pow(usize, 2, (dist / 2) - 1) * (dist % 2)) + 1,
else => unreachable,
};
return base_value + to_add;
}
const BlockData = struct {
const Self = @This();
@ -248,63 +396,157 @@ const BlockData = struct {
last: bool,
blockType: u8,
fn init(allocator: mem.Allocator, bitw: *BitWalker) !Self {
literal_graph: ?HuffmanGraph(u64) = null,
dist_graph: ?HuffmanGraph(u64) = null,
output: *[]u8,
fn init(allocator: mem.Allocator, bitw: *BitWalker, output: *[]u8) !Self {
return Self{
.bitw = bitw,
.allocator = allocator,
.last = try bitw.walk(1) == 1,
.blockType = try bitw.walk(2),
.blockType = @intCast(u8, try bitw.walk(2)),
.output = output,
};
}
fn uncompress(self: *Self) !void {
if (self.blockType != 2) {
return error.unsuported_block_type;
}
try self.dynamic_huffman();
fn uncompress(self: *Self, start_place: usize) !usize {
return switch (self.blockType) {
1 => blk: {
try self.fixed();
break :blk 0;
},
2 => self.dynamic_huffman(start_place),
3 => error.block_type_error,
else => unreachable,
};
}
fn dynamic_huffman(self: *Self) !void {
fn fixed(self: *Self) !void {
var litBuilder = try GraphBuilder(u64).init(self.allocator, 287, 0, 10);
defer litBuilder.deinit();
for (0..144) |i| {
//try lit.addValue(0b00110000 + i, 8, i);
litBuilder.addValue(i, 8);
}
for (144..256) |i| {
//try lit.addValue(0b001100000 + i, 9, i);
litBuilder.addValue(i, 9);
}
for (256..280) |i| {
//try lit.addValue(0b0000000 + i, 7, i);
litBuilder.addValue(i, 7);
}
for (280..287) |i| {
litBuilder.addValue(i, 8);
//try lit.addValue(0b11000000 + i, 8, i);
}
var lit = try litBuilder.build();
self.literal_graph = lit;
lit.print();
// var dist = try HuffmanGraph(u64).init(self.allocator);
// for (0..144) |i| {
// try lit.addValue(0b00110000 + i, 8, i);
// }
// for (144..256) |i| {
// try lit.addValue(0b001100000 + i, 9, i);
// }
// for (256..280) |i| {
// try lit.addValue(0b0000000 + i, 7, i);
// }
// for (280..287) |i| {
// try lit.addValue(0b11000000 + i, 8, i);
// }
// self.literal_graph = lit;
//TODO dist
var bitw = self.bitw;
var number_of_literal_codes: u32 = @as(u32, try bitw.walk(5)) + 257;
var number_of_dist_codes = try bitw.walk(5) + 1;
var number_of_length_codes = try bitw.walk(4) + 4;
print("number of literal codes: {}", .{number_of_literal_codes});
print("number of dist codes: {}", .{number_of_dist_codes});
print("number_of_length_codes: {}", .{number_of_length_codes});
var dynamic_decoder = try DynamicDecoder.init(bitw, number_of_length_codes, self.allocator);
defer dynamic_decoder.deinit();
dynamic_decoder.graph.print();
var code_len: usize = 0;
while (code_len < number_of_literal_codes) {
var decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk());
while (decode_value == null) {
decode_value = try dynamic_decoder.graph.iter(try bitw.bitWalk());
}
print("Test {any}", .{decode_value});
if (decode_value.? == 16) {
while (true) {
var lastRead = try lit.nextBitW(bitw);
if (lastRead == 256) {
break;
} else if (lastRead > 256) {
utils.printf("❓({})", .{lastRead});
//var dist = try self.dist_graph.?.nextBitW(bitw);
//utils.printf("<{}>", .{dist});
return error.not_implemented;
} else if (decode_value.? == 17) {
code_len += try bitw.walk(3);
} else if (decode_value.? == 18) {
code_len += try bitw.walk(7);
} else if (lastRead < 256) {
utils.printf("{c}", .{@intCast(u8, lastRead)});
} else {
code_len += 1;
unreachable;
}
}
return error.todo;
}
fn read_len_code() void {}
fn dynamic_huffman(self: *Self, start_place: usize) !usize {
var bitw = self.bitw;
var number_of_literal_codes: u32 = @intCast(u32, try bitw.walk(5)) + 257;
var number_of_dist_codes = try bitw.walk(5) + 1;
var number_of_length_codes = try bitw.walk(4) + 4;
var dynamic_graph = try create_dynamic_graph(bitw, number_of_length_codes, self.allocator);
defer dynamic_graph.deinit();
var builder = try GraphBuilder(u64).init(self.allocator, number_of_literal_codes, 0, 16);
// destory either the 1st or the 2nd graph
defer builder.deinit();
self.literal_graph = try builder.buildWithDynamic(bitw, &dynamic_graph);
var graph = self.literal_graph.?;
// Destory the first builder
builder.deinit();
builder = try GraphBuilder(u64).init(self.allocator, number_of_dist_codes, 0, 16);
self.dist_graph = try builder.buildWithDynamic(bitw, &dynamic_graph);
var lastRead: u64 = 0;
var i: usize = start_place;
while (true) {
lastRead = try graph.nextBitW(bitw);
if (lastRead == 256) {
break;
} else if (lastRead > 256) {
var len = try get_len_value(bitw, lastRead);
var dist = try get_dist_value(bitw, &self.dist_graph.?);
var pos: usize = i - dist;
for (0..len) |j| {
self.output.ptr[i] = self.output.ptr[pos + j];
i += 1;
}
} else if (lastRead < 256) {
self.output.ptr[i] = @intCast(u8, lastRead);
i += 1;
} else {
unreachable;
}
}
return i;
}
fn deinit(self: *Self) void {
if (self.literal_graph != null) {
self.literal_graph.?.deinit();
}
if (self.dist_graph != null) {
self.dist_graph.?.deinit();
}
}
};
const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50;
@ -366,10 +608,10 @@ const ZipFileHeader = struct {
}
fn extract(self: *Self) !void {
if (self.decompressed) {
return error.AlreadyDecompressed;
}
// already decompressed
if (self.decompressed) return;
// already decompressed
if (self.compression_method == 0) {
return error.uncompressed_file;
}
@ -380,19 +622,22 @@ const ZipFileHeader = struct {
self.uncompressed_content = try self.allocator.alloc(u8, self.uncompressed_size);
errdefer self.allocator.free(self.uncompressed_content);
mem.set(u8, self.uncompressed_content, 0);
var bitw = BitWalker.init(&self.compressed_content, false);
var bitw = try BitWalker.init(&self.compressed_content);
var block = try BlockData.init(self.allocator, &bitw);
var is_last = false;
var output_place: usize = 0;
if (block.last) {
print("last block", .{});
} else {
print("not last block", .{});
while (!is_last) {
var block = try BlockData.init(self.allocator, &bitw, &self.uncompressed_content);
defer block.deinit();
is_last = block.last;
output_place = try block.uncompress(output_place);
}
try block.uncompress();
self.decompressed = true;
}
@ -460,14 +705,12 @@ pub fn main() !void {
exit("Invalid file provided", .{}, 1);
}
print("H: {}", .{first_file.compression_method});
while (true) {
var second_file = try ZipFileHeader.init(allocator, reader);
defer second_file.deinit();
var second_file = try ZipFileHeader.init(allocator, reader);
defer second_file.deinit();
try second_file.extract();
print("G: {s}", .{second_file.file_name});
print("GI: {}", .{second_file.compression_method});
print("xml stuff:\n{s}", .{second_file.compressed_content});
try second_file.extract();
print("G: {s}", .{second_file.file_name});
print("xml stuff:\n{s}", .{second_file.uncompressed_content});
}
}

View File

@ -1,7 +1,7 @@
const std = @import("std");
const utils = @import("utils.zig");
pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type {
pub fn BitWalkerUint(comptime T: anytype) type {
const typeInfo = @typeInfo(T);
if (typeInfo != .Int) {
@ -18,15 +18,16 @@ pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type {
// TODO this is probably wrong
in_byte_position: i16,
size: u8,
size: usize,
reverse: bool,
const Self = @This();
pub fn init(value: T, size: u8) !Self {
pub fn init(value: T, size: usize, reverse: bool) !Self {
if (typeInfo.Int.bits < size)
return error.invlaid_size;
var start_value: u8 = 0;
var start_value: usize = 0;
if (reverse) {
start_value = size - 1;
}
@ -39,9 +40,10 @@ pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type {
return Self{
.value = value,
.in_byte_position = start_value,
.in_byte_position = @intCast(i16, start_value),
.size = size,
.mask = mask,
.reverse = reverse,
};
}
@ -51,7 +53,9 @@ pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type {
var result = (self.value & self.mask) == self.mask;
if (reverse) {
//utils.print("walkbit: {b:08} & {b:08} -> {b:08} ({})", .{ self.value, self.mask, self.value & self.mask, result });
if (self.reverse) {
self.in_byte_position -= 1;
if (self.mask == 1) {
self.mask = 0;
@ -60,11 +64,10 @@ pub fn BitWalkerUint(comptime T: anytype, comptime reverse: bool) type {
}
} else {
self.in_byte_position += 1;
self.mask = @shlExact(self.mask, 1);
if (self.in_byte_position > self.size) {
if (self.in_byte_position >= self.size) {
self.mask = 0;
} else {
self.mask = @shrExact(self.mask, 1);
self.mask = @shlExact(self.mask, 1);
}
}
@ -77,77 +80,79 @@ pub const BitWalker = struct {
const Self = @This();
data: *[]u8,
position: usize = 0,
in_byte_position: u3 = 0,
direction: bool = false,
pub fn init(data: *[]u8, direction: bool) Self {
// True == most Significant
// False == least Significant
direction: bool = false,
unitWaker: BitWalkerUint(u8),
position: usize,
pub fn init(data: *[]u8) !Self {
return Self{
.data = data,
.direction = direction,
.position = 0,
.unitWaker = try BitWalkerUint(u8).init(data.ptr[0], 8, false),
};
}
pub fn change_direction(self: *Self) !void {
self.direction = !self.direction;
if (self.unitWaker.mask == 0)
self.position += 1;
// TODO probs wrong when mask != 0
self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction);
}
pub fn smart_change_direction(self: *Self) !void {
self.direction = !self.direction;
if (self.unitWaker.mask == 0) {
self.position += 1;
self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction);
return;
}
if (!self.direction) {
self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction);
} else {
self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction);
}
}
pub fn nextByte(self: *Self) !void {
self.position += 1;
self.unitWaker = try BitWalkerUint(u8).init(self.data.ptr[self.position], 8, self.direction);
}
pub fn status(self: *Self) void {
utils.print("p: {}, in: {} ({b:08})", .{ self.position, self.unitWaker.in_byte_position, self.unitWaker.mask });
}
pub fn walk(self: *Self, bits: usize) !u64 {
var number: u64 = 0;
//utils.print("walk: {}", .{bits});
var i: usize = 1;
while (i <= bits) : (i += 1) {
//utils.print("p: {}, in_p: {}", .{ self.position, self.unitWaker.in_byte_position });
if (self.unitWaker.walkBit()) |next_number| {
if (self.direction) {
number = @shlExact(number, 1);
number += next_number;
} else {
number += @shlExact(@as(u64, next_number), @intCast(u6, i - 1));
}
} else {
try self.nextByte();
i -= 1;
}
}
//utils.print("result: {} ({b:08})", .{ number, number });
return number;
}
pub fn bitWalk(self: *Self) !u1 {
return @intCast(u1, try self.walk(1));
}
// TODO direction
pub fn walk(self: *Self, bits: u3) !u8 {
if (bits > 8 or bits == 0) return error.invalid_bit_number;
var byte = self.data.ptr[self.position];
// jumps over bytes
if (self.in_byte_position + @as(u4, bits) > 8) {
// Generate a mast that covers the last part of the old byte
var old_mask: u8 = 0;
var i: usize = 0;
while (i < 8 - @as(u4, self.in_byte_position)) : (i += 1) {
old_mask = @shlExact(old_mask, 1) + 1;
}
old_mask = @shlExact(old_mask, self.in_byte_position);
var next_byte = self.data.ptr[self.position + 1];
var new_byte_pos: u3 = @intCast(u3, @as(u4, bits) - (8 - @as(u4, self.in_byte_position)));
var new_mask: u8 = 0;
var j: usize = 0;
while (j < new_byte_pos) : (j += 1) {
new_mask = @shlExact(new_mask, 1) + 1;
}
var result = @shrExact(byte & old_mask, self.in_byte_position) + @shlExact(next_byte & new_mask, @intCast(u3, 8 - @as(u4, self.in_byte_position)));
//print("mask: {b}, new_mask: {b}", .{ old_mask, new_mask });
//print("here {b} {b}", .{ byte, old_mask });
//print("here_new {b} {b}", .{ next_byte, new_mask });
//print("result {}", .{result});
self.position += 1;
self.in_byte_position = new_byte_pos;
return result;
}
// Generate a mast that covers the last part of the old byte
var old_mask: u8 = 0;
var i: usize = 0;
while (i < bits) : (i += 1) {
old_mask = @shlExact(old_mask, 1) + 1;
}
old_mask = @shlExact(old_mask, self.in_byte_position);
const result = @shrExact(byte & old_mask, self.in_byte_position);
const sum = @intCast(u4, self.in_byte_position) + @intCast(u4, bits);
if (sum == 8) {
self.position += 1;
self.in_byte_position = 0;
} else {
self.in_byte_position += bits;
}
return result;
}
};