const std = @import("std");
const math = std.math;
const mem = std.mem;
const Allocator = std.mem.Allocator;
const deflate_const = @import("deflate_const.zig");
const deflate = @import("compressor.zig");
const token = @import("token.zig");
const base_match_length = deflate_const.base_match_length;
const base_match_offset = deflate_const.base_match_offset;
const max_match_length = deflate_const.max_match_length;
const max_match_offset = deflate_const.max_match_offset;
const max_store_block_size = deflate_const.max_store_block_size;
const table_bits = 14; 
const table_mask = table_size - 1; 
const table_shift = 32 - table_bits; 
const table_size = 1 << table_bits; 
const buffer_reset = math.maxInt(i32) - max_store_block_size * 2;
fn load32(b: []u8, i: i32) u32 {
    var s = b[@intCast(usize, i) .. @intCast(usize, i) + 4];
    return @intCast(u32, s[0]) |
        @intCast(u32, s[1]) << 8 |
        @intCast(u32, s[2]) << 16 |
        @intCast(u32, s[3]) << 24;
}
fn load64(b: []u8, i: i32) u64 {
    var s = b[@intCast(usize, i)..@intCast(usize, i + 8)];
    return @intCast(u64, s[0]) |
        @intCast(u64, s[1]) << 8 |
        @intCast(u64, s[2]) << 16 |
        @intCast(u64, s[3]) << 24 |
        @intCast(u64, s[4]) << 32 |
        @intCast(u64, s[5]) << 40 |
        @intCast(u64, s[6]) << 48 |
        @intCast(u64, s[7]) << 56;
}
fn hash(u: u32) u32 {
    return (u *% 0x1e35a7bd) >> table_shift;
}
const input_margin = 16 - 1;
const min_non_literal_block_size = 1 + 1 + input_margin;
const TableEntry = struct {
    val: u32, 
    offset: i32,
};
pub fn deflateFast() DeflateFast {
    return DeflateFast{
        .table = [_]TableEntry{.{ .val = 0, .offset = 0 }} ** table_size,
        .prev = undefined,
        .prev_len = 0,
        .cur = max_store_block_size,
        .allocator = undefined,
    };
}
pub const DeflateFast = struct {
    table: [table_size]TableEntry,
    prev: []u8, 
    prev_len: u32, 
    cur: i32, 
    allocator: Allocator,
    const Self = @This();
    pub fn init(self: *Self, allocator: Allocator) !void {
        self.allocator = allocator;
        self.prev = try allocator.alloc(u8, max_store_block_size);
        self.prev_len = 0;
    }
    pub fn deinit(self: *Self) void {
        self.allocator.free(self.prev);
        self.prev_len = 0;
    }
    
    pub fn encode(self: *Self, dst: []token.Token, tokens_count: *u16, src: []u8) void {
        
        if (self.cur >= buffer_reset) {
            self.shiftOffsets();
        }
        
        
        if (src.len < min_non_literal_block_size) {
            self.cur += max_store_block_size;
            self.prev_len = 0;
            emitLiteral(dst, tokens_count, src);
            return;
        }
        
        
        
        var s_limit = @intCast(i32, src.len - input_margin);
        
        var next_emit: i32 = 0;
        var s: i32 = 0;
        var cv: u32 = load32(src, s);
        var next_hash: u32 = hash(cv);
        outer: while (true) {
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            
            var skip: i32 = 32;
            var next_s: i32 = s;
            var candidate: TableEntry = undefined;
            while (true) {
                s = next_s;
                var bytes_between_hash_lookups = skip >> 5;
                next_s = s + bytes_between_hash_lookups;
                skip += bytes_between_hash_lookups;
                if (next_s > s_limit) {
                    break :outer;
                }
                candidate = self.table[next_hash & table_mask];
                var now = load32(src, next_s);
                self.table[next_hash & table_mask] = .{ .offset = s + self.cur, .val = cv };
                next_hash = hash(now);
                var offset = s - (candidate.offset - self.cur);
                if (offset > max_match_offset or cv != candidate.val) {
                    
                    cv = now;
                    continue;
                }
                break;
            }
            
            
            
            emitLiteral(dst, tokens_count, src[@intCast(usize, next_emit)..@intCast(usize, s)]);
            
            
            
            
            
            
            
            
            while (true) {
                
                
                
                
                s += 4;
                var t = candidate.offset - self.cur + 4;
                var l = self.matchLen(s, t, src);
                
                dst[tokens_count.*] = token.matchToken(
                    @intCast(u32, l + 4 - base_match_length),
                    @intCast(u32, s - t - base_match_offset),
                );
                tokens_count.* += 1;
                s += l;
                next_emit = s;
                if (s >= s_limit) {
                    break :outer;
                }
                
                
                
                
                
                
                var x = load64(src, s - 1);
                var prev_hash = hash(@truncate(u32, x));
                self.table[prev_hash & table_mask] = TableEntry{
                    .offset = self.cur + s - 1,
                    .val = @truncate(u32, x),
                };
                x >>= 8;
                var curr_hash = hash(@truncate(u32, x));
                candidate = self.table[curr_hash & table_mask];
                self.table[curr_hash & table_mask] = TableEntry{
                    .offset = self.cur + s,
                    .val = @truncate(u32, x),
                };
                var offset = s - (candidate.offset - self.cur);
                if (offset > max_match_offset or @truncate(u32, x) != candidate.val) {
                    cv = @truncate(u32, x >> 8);
                    next_hash = hash(cv);
                    s += 1;
                    break;
                }
            }
        }
        if (@intCast(u32, next_emit) < src.len) {
            emitLiteral(dst, tokens_count, src[@intCast(usize, next_emit)..]);
        }
        self.cur += @intCast(i32, src.len);
        self.prev_len = @intCast(u32, src.len);
        mem.copy(u8, self.prev[0..self.prev_len], src);
        return;
    }
    fn emitLiteral(dst: []token.Token, tokens_count: *u16, lit: []u8) void {
        for (lit) |v| {
            dst[tokens_count.*] = token.literalToken(@intCast(u32, v));
            tokens_count.* += 1;
        }
        return;
    }
    
    
    
    fn matchLen(self: *Self, s: i32, t: i32, src: []u8) i32 {
        var s1 = @intCast(u32, s) + max_match_length - 4;
        if (s1 > src.len) {
            s1 = @intCast(u32, src.len);
        }
        
        if (t >= 0) {
            var b = src[@intCast(usize, t)..];
            var a = src[@intCast(usize, s)..@intCast(usize, s1)];
            b = b[0..a.len];
            
            for (a) |_, i| {
                if (a[i] != b[i]) {
                    return @intCast(i32, i);
                }
            }
            return @intCast(i32, a.len);
        }
        
        var tp = @intCast(i32, self.prev_len) + t;
        if (tp < 0) {
            return 0;
        }
        
        var a = src[@intCast(usize, s)..@intCast(usize, s1)];
        var b = self.prev[@intCast(usize, tp)..@intCast(usize, self.prev_len)];
        if (b.len > a.len) {
            b = b[0..a.len];
        }
        a = a[0..b.len];
        for (b) |_, i| {
            if (a[i] != b[i]) {
                return @intCast(i32, i);
            }
        }
        
        
        var n = @intCast(i32, b.len);
        if (@intCast(u32, s + n) == s1) {
            return n;
        }
        
        a = src[@intCast(usize, s + n)..@intCast(usize, s1)];
        b = src[0..a.len];
        for (a) |_, i| {
            if (a[i] != b[i]) {
                return @intCast(i32, i) + n;
            }
        }
        return @intCast(i32, a.len) + n;
    }
    
    
    pub fn reset(self: *Self) void {
        self.prev_len = 0;
        
        
        self.cur += max_match_offset;
        
        if (self.cur >= buffer_reset) {
            self.shiftOffsets();
        }
    }
    
    
    
    
    fn shiftOffsets(self: *Self) void {
        if (self.prev_len == 0) {
            
            for (self.table) |_, i| {
                self.table[i] = TableEntry{ .val = 0, .offset = 0 };
            }
            self.cur = max_match_offset + 1;
            return;
        }
        
        for (self.table) |_, i| {
            var v = self.table[i].offset - self.cur + max_match_offset + 1;
            if (v < 0) {
                
                
                
                
                v = 0;
            }
            self.table[i].offset = v;
        }
        self.cur = max_match_offset + 1;
    }
};
test "best speed match 1/3" {
    const expect = std.testing.expect;
    {
        var previous = [_]u8{ 0, 0, 0, 1, 2 };
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 3, 4, 5, 0, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(3, -3, ¤t);
        try expect(got == 6);
    }
    {
        var previous = [_]u8{ 0, 0, 0, 1, 2 };
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 2, 4, 5, 0, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(3, -3, ¤t);
        try expect(got == 3);
    }
    {
        var previous = [_]u8{ 0, 0, 0, 1, 1 };
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 3, 4, 5, 0, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(3, -3, ¤t);
        try expect(got == 2);
    }
    {
        var previous = [_]u8{ 0, 0, 0, 1, 2 };
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(0, -1, ¤t);
        try expect(got == 4);
    }
    {
        var previous = [_]u8{ 0, 0, 0, 1, 2, 3, 4, 5, 2, 2 };
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(4, -7, ¤t);
        try expect(got == 5);
    }
    {
        var previous = [_]u8{ 9, 9, 9, 9, 9 };
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(0, -1, ¤t);
        try expect(got == 0);
    }
    {
        var previous = [_]u8{ 9, 9, 9, 9, 9 };
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(1, 0, ¤t);
        try expect(got == 0);
    }
}
test "best speed match 2/3" {
    const expect = std.testing.expect;
    {
        var previous = [_]u8{};
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(1, -5, ¤t);
        try expect(got == 0);
    }
    {
        var previous = [_]u8{};
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(1, -1, ¤t);
        try expect(got == 0);
    }
    {
        var previous = [_]u8{};
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
        var got: i32 = e.matchLen(1, 0, ¤t);
        try expect(got == 3);
    }
    {
        var previous = [_]u8{ 3, 4, 5 };
        var e = DeflateFast{
            .prev = &previous,
            .prev_len = previous.len,
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var current = [_]u8{ 3, 4, 5 };
        var got: i32 = e.matchLen(0, -3, ¤t);
        try expect(got == 3);
    }
}
test "best speed match 2/2" {
    const testing = std.testing;
    const expect = testing.expect;
    const Case = struct {
        previous: u32,
        current: u32,
        s: i32,
        t: i32,
        expected: i32,
    };
    const cases = [_]Case{
        .{
            .previous = 1000,
            .current = 1000,
            .s = 0,
            .t = -1000,
            .expected = max_match_length - 4,
        },
        .{
            .previous = 200,
            .s = 0,
            .t = -200,
            .current = 500,
            .expected = max_match_length - 4,
        },
        .{
            .previous = 200,
            .s = 1,
            .t = 0,
            .current = 500,
            .expected = max_match_length - 4,
        },
        .{
            .previous = max_match_length - 4,
            .s = 0,
            .t = -(max_match_length - 4),
            .current = 500,
            .expected = max_match_length - 4,
        },
        .{
            .previous = 200,
            .s = 400,
            .t = -200,
            .current = 500,
            .expected = 100,
        },
        .{
            .previous = 10,
            .s = 400,
            .t = 200,
            .current = 500,
            .expected = 100,
        },
    };
    for (cases) |c| {
        var previous = try testing.allocator.alloc(u8, c.previous);
        defer testing.allocator.free(previous);
        mem.set(u8, previous, 0);
        var current = try testing.allocator.alloc(u8, c.current);
        defer testing.allocator.free(current);
        mem.set(u8, current, 0);
        var e = DeflateFast{
            .prev = previous,
            .prev_len = @intCast(u32, previous.len),
            .table = undefined,
            .allocator = undefined,
            .cur = 0,
        };
        var got: i32 = e.matchLen(c.s, c.t, current);
        try expect(got == c.expected);
    }
}
test "best speed shift offsets" {
    const testing = std.testing;
    const expect = std.testing.expect;
    
    
    var enc = deflateFast();
    try enc.init(testing.allocator);
    defer enc.deinit();
    
    var test_data = [32]u8{
        0xf5, 0x25, 0xf2, 0x55, 0xf6, 0xc1, 0x1f, 0x0b, 0x10, 0xa1,
        0xd0, 0x77, 0x56, 0x38, 0xf1, 0x9c, 0x7f, 0x85, 0xc5, 0xbd,
        0x16, 0x28, 0xd4, 0xf9, 0x03, 0xd4, 0xc0, 0xa1, 0x1e, 0x58,
        0x5b, 0xc9,
    };
    var tokens = [_]token.Token{0} ** 32;
    var tokens_count: u16 = 0;
    
    
    tokens_count = 0;
    enc.encode(&tokens, &tokens_count, &test_data);
    var want_first_tokens = tokens_count;
    tokens_count = 0;
    enc.encode(&tokens, &tokens_count, &test_data);
    var want_second_tokens = tokens_count;
    try expect(want_first_tokens > want_second_tokens);
    
    enc.cur = buffer_reset - @intCast(i32, test_data.len);
    
    tokens_count = 0;
    enc.encode(&tokens, &tokens_count, &test_data);
    var got = tokens_count;
    try expect(want_first_tokens == got);
    
    try expect(enc.cur == buffer_reset);
    
    tokens_count = 0;
    enc.encode(&tokens, &tokens_count, &test_data);
    got = tokens_count;
    try expect(want_second_tokens == got);
    
    try expect(enc.cur < buffer_reset);
    
    enc.cur = buffer_reset;
    enc.shiftOffsets();
    
    tokens_count = 0;
    enc.encode(&tokens, &tokens_count, &test_data);
    got = tokens_count;
    try expect(want_first_tokens == got);
}
test "best speed reset" {
    
    
    const expect = std.testing.expect;
    const fmt = std.fmt;
    const testing = std.testing;
    const ArrayList = std.ArrayList;
    const input_size = 65536;
    var input = try testing.allocator.alloc(u8, input_size);
    defer testing.allocator.free(input);
    var i: usize = 0;
    while (i < input_size) : (i += 1) {
        _ = try fmt.bufPrint(input, "asdfasdfasdfasdf{d}{d}fghfgujyut{d}yutyu\n", .{ i, i, i });
    }
    
    const level = .best_speed;
    const offset: usize = 1;
    
    var want = ArrayList(u8).init(testing.allocator);
    defer want.deinit();
    var clean_comp = try deflate.compressor(
        testing.allocator,
        want.writer(),
        .{ .level = level },
    );
    defer clean_comp.deinit();
    
    try clean_comp.writer().writeAll(input);
    try clean_comp.writer().writeAll(input);
    try clean_comp.writer().writeAll(input);
    try clean_comp.close();
    var o = offset;
    while (o <= 256) : (o *= 2) {
        var discard = ArrayList(u8).init(testing.allocator);
        defer discard.deinit();
        var comp = try deflate.compressor(
            testing.allocator,
            discard.writer(),
            .{ .level = level },
        );
        defer comp.deinit();
        
        
        i = 0;
        var limit = (buffer_reset - input.len - o - max_match_offset) / max_match_offset;
        while (i < limit) : (i += 1) {
            
            comp.reset(discard.writer());
        }
        var got = ArrayList(u8).init(testing.allocator);
        defer got.deinit();
        comp.reset(got.writer());
        
        try comp.writer().writeAll(input);
        try comp.writer().writeAll(input);
        try comp.writer().writeAll(input);
        try comp.close();
        
        try expect(mem.eql(u8, got.items, want.items));
    }
}