zls/src/offsets.zig

const std = @import("std");
const types = @import("types.zig");

pub const Encoding = enum {
    utf8,
    utf16,
};

pub const DocumentPosition = struct {
    line: []const u8,
    line_index: usize,
    absolute_index: usize,
};

pub fn documentPosition(doc: types.TextDocument, position: types.Position, encoding: Encoding) !DocumentPosition {
    var split_iterator = std.mem.split(doc.text, "\n");

    var line_idx: i64 = 0;
    var line: []const u8 = "";
    while (line_idx < position.line) : (line_idx += 1) {
        line = split_iterator.next() orelse return error.InvalidParams;
    }

    const line_start_idx = split_iterator.index.?;
    line = split_iterator.next() orelse return error.InvalidParams;

    if (encoding == .utf8) {
        const index = @intCast(i64, line_start_idx) + position.character;
        if (index < 0 or index > @intCast(i64, doc.text.len)) {
            return error.InvalidParams;
        }
        return DocumentPosition{ .line = line, .absolute_index = @intCast(usize, index), .line_index = @intCast(usize, position.character) };
    } else {
        const utf8 = doc.text[line_start_idx..];
        var utf8_idx: usize = 0;
        var utf16_idx: usize = 0;
        while (utf16_idx < position.character) {
            if (utf8_idx > utf8.len) {
                return error.InvalidParams;
            }

            const n = try std.unicode.utf8ByteSequenceLength(utf8[utf8_idx]);
            const next_utf8_idx = utf8_idx + n;
            const codepoint = try std.unicode.utf8Decode(utf8[utf8_idx..next_utf8_idx]);
            if (codepoint < 0x10000) {
                utf16_idx += 1;
            } else {
                utf16_idx += 2;
            }
            utf8_idx = next_utf8_idx;
        }
        return DocumentPosition{ .line = line, .absolute_index = line_start_idx + utf8_idx, .line_index = utf8_idx };
    }
}

pub const TokenLocation = struct {
    line: usize,
    column: usize,
    offset: usize,

    pub fn add(lhs: TokenLocation, rhs: TokenLocation) TokenLocation {
        return .{
            .line = lhs.line + rhs.line,
            .column = if (rhs.line == 0)
                lhs.column + rhs.column
            else
                rhs.column,
            .offset = rhs.offset,
        };
    }
};

pub fn tokenRelativeLocation(tree: *std.zig.ast.Tree, start_index: usize, token: std.zig.ast.TokenIndex, encoding: Encoding) !TokenLocation {
    const token_loc = tree.token_locs[token];

    var loc = TokenLocation{
        .line = 0,
        .column = 0,
        .offset = 0,
    };
    const token_start = token_loc.start;
    const source = tree.source[start_index..];
    var i: usize = 0;
    while (i < token_start - start_index) {
        const c = source[i];
        if (c == '\n') {
            loc.line += 1;
            loc.column = 0;
            i += 1;
        } else {
            if (encoding == .utf16) {
                const n = try std.unicode.utf8ByteSequenceLength(c);
                const codepoint = try std.unicode.utf8Decode(source[i .. i + n]);
                if (codepoint < 0x10000) {
                    loc.column += 1;
                } else {
                    loc.column += 2;
                }
                i += n;
            } else {
                loc.column += 1;
                i += 1;
            }
        }
    }
    loc.offset = i + start_index;
    return loc;
}

/// Asserts the token is comprised of valid utf8
pub fn tokenLength(tree: *std.zig.ast.Tree, token: std.zig.ast.TokenIndex, encoding: Encoding) usize {
    const token_loc = tree.token_locs[token];
    if (encoding == .utf8)
        return token_loc.end - token_loc.start;

    var i: usize = token_loc.start;
    var utf16_len: usize = 0;
    while (i < token_loc.end) {
        const n = std.unicode.utf8ByteSequenceLength(tree.source[i]) catch unreachable;
        const codepoint = std.unicode.utf8Decode(tree.source[i .. i + n]) catch unreachable;
        if (codepoint < 0x10000) {
            utf16_len += 1;
        } else {
            utf16_len += 2;
        }
        i += n;
    }
    return utf16_len;
}

pub fn documentRange(doc: types.TextDocument, encoding: Encoding) !types.Range {
    var line_idx: i64 = 0;
    var curr_line: []const u8 = doc.text;

    var split_iterator = std.mem.split(doc.text, "\n");
    while (split_iterator.next()) |line| : (line_idx += 1) {
        curr_line = line;
    }

    if (encoding == .utf8) {
        return types.Range{
            .start = .{
                .line = 0,
                .character = 0,
            },
            .end = .{
                .line = line_idx,
                .character = @intCast(i64, curr_line.len),
            },
        };
    } else {
        var utf16_len: usize = 0;
        var line_utf8_idx: usize = 0;
        while (line_utf8_idx < curr_line.len) {
            const n = try std.unicode.utf8ByteSequenceLength(curr_line[line_utf8_idx]);
            const codepoint = try std.unicode.utf8Decode(curr_line[line_utf8_idx .. line_utf8_idx + n]);
            if (codepoint < 0x10000) {
                utf16_len += 1;
            } else {
                utf16_len += 2;
            }
            line_utf8_idx += n;
        }
        return types.Range{
            .start = .{
                .line = 0,
                .character = 0,
            },
            .end = .{
                .line = line_idx,
                .character = @intCast(i64, utf16_len),
            },
        };
    }
}
Add offsets file 2020-07-03 00:33:10 +01:00			`const std = @import("std");`
			`const types = @import("types.zig");`

			`pub const Encoding = enum {`
			`utf8,`
			`utf16,`
			`};`

			`pub const DocumentPosition = struct {`
			`line: []const u8,`
			`line_index: usize,`
			`absolute_index: usize,`
			`};`

			`pub fn documentPosition(doc: types.TextDocument, position: types.Position, encoding: Encoding) !DocumentPosition {`
			`var split_iterator = std.mem.split(doc.text, "\n");`

			`var line_idx: i64 = 0;`
			`var line: []const u8 = "";`
			`while (line_idx < position.line) : (line_idx += 1) {`
			`line = split_iterator.next() orelse return error.InvalidParams;`
			`}`

			`const line_start_idx = split_iterator.index.?;`
			`line = split_iterator.next() orelse return error.InvalidParams;`

			`if (encoding == .utf8) {`
			`const index = @intCast(i64, line_start_idx) + position.character;`
			`if (index < 0 or index > @intCast(i64, doc.text.len)) {`
			`return error.InvalidParams;`
			`}`
			`return DocumentPosition{ .line = line, .absolute_index = @intCast(usize, index), .line_index = @intCast(usize, position.character) };`
			`} else {`
			`const utf8 = doc.text[line_start_idx..];`
			`var utf8_idx: usize = 0;`
			`var utf16_idx: usize = 0;`
			`while (utf16_idx < position.character) {`
			`if (utf8_idx > utf8.len) {`
			`return error.InvalidParams;`
			`}`

			`const n = try std.unicode.utf8ByteSequenceLength(utf8[utf8_idx]);`
			`const next_utf8_idx = utf8_idx + n;`
			`const codepoint = try std.unicode.utf8Decode(utf8[utf8_idx..next_utf8_idx]);`
			`if (codepoint < 0x10000) {`
			`utf16_idx += 1;`
			`} else {`
			`utf16_idx += 2;`
			`}`
			`utf8_idx = next_utf8_idx;`
			`}`
			`return DocumentPosition{ .line = line, .absolute_index = line_start_idx + utf8_idx, .line_index = utf8_idx };`
			`}`
			`}`
Correctly handle utf16 offsets for semantic tokens 2020-07-03 09:34:42 +01:00
			`pub const TokenLocation = struct {`
			`line: usize,`
			`column: usize,`
Optimize document symbols 2020-07-07 09:57:02 +01:00			`offset: usize,`

			`pub fn add(lhs: TokenLocation, rhs: TokenLocation) TokenLocation {`
			`return .{`
			`.line = lhs.line + rhs.line,`
			`.column = if (rhs.line == 0)`
			`lhs.column + rhs.column`
			`else`
			`rhs.column,`
			`.offset = rhs.offset,`
			`};`
			`}`
Correctly handle utf16 offsets for semantic tokens 2020-07-03 09:34:42 +01:00			`};`

			`pub fn tokenRelativeLocation(tree: *std.zig.ast.Tree, start_index: usize, token: std.zig.ast.TokenIndex, encoding: Encoding) !TokenLocation {`
			`const token_loc = tree.token_locs[token];`

			`var loc = TokenLocation{`
			`.line = 0,`
			`.column = 0,`
Optimize document symbols 2020-07-07 09:57:02 +01:00			`.offset = 0,`
Correctly handle utf16 offsets for semantic tokens 2020-07-03 09:34:42 +01:00			`};`
			`const token_start = token_loc.start;`
			`const source = tree.source[start_index..];`
			`var i: usize = 0;`
			`while (i < token_start - start_index) {`
			`const c = source[i];`
			`if (c == '\n') {`
			`loc.line += 1;`
			`loc.column = 0;`
			`i += 1;`
			`} else {`
			`if (encoding == .utf16) {`
			`const n = try std.unicode.utf8ByteSequenceLength(c);`
zig fmt pass, correctly return after sending formatting result 2020-07-03 10:24:24 +01:00			`const codepoint = try std.unicode.utf8Decode(source[i .. i + n]);`
Correctly handle utf16 offsets for semantic tokens 2020-07-03 09:34:42 +01:00			`if (codepoint < 0x10000) {`
			`loc.column += 1;`
			`} else {`
			`loc.column += 2;`
			`}`
			`i += n;`
			`} else {`
			`loc.column += 1;`
			`i += 1;`
			`}`
			`}`
			`}`
Optimize document symbols 2020-07-07 09:57:02 +01:00			`loc.offset = i + start_index;`
Correctly handle utf16 offsets for semantic tokens 2020-07-03 09:34:42 +01:00			`return loc;`
			`}`

			`/// Asserts the token is comprised of valid utf8`
			`pub fn tokenLength(tree: *std.zig.ast.Tree, token: std.zig.ast.TokenIndex, encoding: Encoding) usize {`
			`const token_loc = tree.token_locs[token];`
			`if (encoding == .utf8)`
			`return token_loc.end - token_loc.start;`

			`var i: usize = token_loc.start;`
			`var utf16_len: usize = 0;`
			`while (i < token_loc.end) {`
			`const n = std.unicode.utf8ByteSequenceLength(tree.source[i]) catch unreachable;`
zig fmt pass, correctly return after sending formatting result 2020-07-03 10:24:24 +01:00			`const codepoint = std.unicode.utf8Decode(tree.source[i .. i + n]) catch unreachable;`
Correctly handle utf16 offsets for semantic tokens 2020-07-03 09:34:42 +01:00			`if (codepoint < 0x10000) {`
			`utf16_len += 1;`
			`} else {`
			`utf16_len += 2;`
			`}`
			`i += n;`
			`}`
			`return utf16_len;`
			`}`
Added encoding offset aware documentRange 2020-07-03 10:39:58 +01:00
			`pub fn documentRange(doc: types.TextDocument, encoding: Encoding) !types.Range {`
			`var line_idx: i64 = 0;`
			`var curr_line: []const u8 = doc.text;`

			`var split_iterator = std.mem.split(doc.text, "\n");`
			`while (split_iterator.next()) \|line\| : (line_idx += 1) {`
			`curr_line = line;`
			`}`

			`if (encoding == .utf8) {`
			`return types.Range{`
			`.start = .{`
			`.line = 0,`
			`.character = 0,`
			`},`
			`.end = .{`
			`.line = line_idx,`
			`.character = @intCast(i64, curr_line.len),`
			`},`
			`};`
			`} else {`
			`var utf16_len: usize = 0;`
			`var line_utf8_idx: usize = 0;`
			`while (line_utf8_idx < curr_line.len) {`
			`const n = try std.unicode.utf8ByteSequenceLength(curr_line[line_utf8_idx]);`
			`const codepoint = try std.unicode.utf8Decode(curr_line[line_utf8_idx .. line_utf8_idx + n]);`
			`if (codepoint < 0x10000) {`
			`utf16_len += 1;`
			`} else {`
			`utf16_len += 2;`
			`}`
			`line_utf8_idx += n;`
			`}`
			`return types.Range{`
			`.start = .{`
			`.line = 0,`
			`.character = 0,`
			`},`
			`.end = .{`
			`.line = line_idx,`
			`.character = @intCast(i64, utf16_len),`
			`},`
			`};`
			`}`
			`}`