zls/src/offsets.zig
Alexandros Naskos 06518778c1
Removed comment preprocessing code from the semantic tokens gap highlighter,
Fixed various comment semantic highlighting issues.
Refactored some minor parts of the analysis code.
2021-03-26 21:04:51 +02:00

738 lines
24 KiB
Zig

const std = @import("std");
const types = @import("types.zig");
const ast = std.zig.ast;
pub const Encoding = enum {
utf8,
utf16,
};
pub const DocumentPosition = struct {
line: []const u8,
line_index: usize,
absolute_index: usize,
};
pub fn documentPosition(doc: types.TextDocument, position: types.Position, encoding: Encoding) !DocumentPosition {
var split_iterator = std.mem.split(doc.text, "\n");
var line_idx: i64 = 0;
var line: []const u8 = "";
while (line_idx < position.line) : (line_idx += 1) {
line = split_iterator.next() orelse return error.InvalidParams;
}
const line_start_idx = split_iterator.index.?;
line = split_iterator.next() orelse return error.InvalidParams;
if (encoding == .utf8) {
const index = @intCast(i64, line_start_idx) + position.character;
if (index < 0 or index > @intCast(i64, doc.text.len)) {
return error.InvalidParams;
}
return DocumentPosition{ .line = line, .absolute_index = @intCast(usize, index), .line_index = @intCast(usize, position.character) };
} else {
const utf8 = doc.text[line_start_idx..];
var utf8_idx: usize = 0;
var utf16_idx: usize = 0;
while (utf16_idx < position.character) {
if (utf8_idx > utf8.len) {
return error.InvalidParams;
}
const n = try std.unicode.utf8ByteSequenceLength(utf8[utf8_idx]);
const next_utf8_idx = utf8_idx + n;
const codepoint = try std.unicode.utf8Decode(utf8[utf8_idx..next_utf8_idx]);
if (codepoint < 0x10000) {
utf16_idx += 1;
} else {
utf16_idx += 2;
}
utf8_idx = next_utf8_idx;
}
return DocumentPosition{ .line = line, .absolute_index = line_start_idx + utf8_idx, .line_index = utf8_idx };
}
}
pub fn lineSectionLength(tree: ast.Tree, start_index: usize, end_index: usize, encoding: Encoding) !usize {
const source = tree.source[start_index..];
std.debug.assert(end_index >= start_index and source.len >= end_index - start_index);
if (encoding == .utf8) {
return end_index - start_index;
}
var result: usize = 0;
var i: usize = 0;
while (i + start_index < end_index) {
std.debug.assert(source[i] != '\n');
const n = try std.unicode.utf8ByteSequenceLength(source[i]);
if (i + n >= source.len)
return error.CodepointTooLong;
const codepoint = try std.unicode.utf8Decode(source[i .. i + n]);
result += 1 + @as(usize, @boolToInt(codepoint >= 0x10000));
i += n;
}
return result;
}
pub const TokenLocation = struct {
line: usize,
column: usize,
offset: usize,
pub fn add(lhs: TokenLocation, rhs: TokenLocation) TokenLocation {
return .{
.line = lhs.line + rhs.line,
.column = if (rhs.line == 0)
lhs.column + rhs.column
else
rhs.column,
.offset = rhs.offset,
};
}
};
pub fn tokenRelativeLocation(tree: ast.Tree, start_index: usize, token_start: usize, encoding: Encoding) !TokenLocation {
std.debug.assert(token_start >= start_index);
var loc = TokenLocation{
.line = 0,
.column = 0,
.offset = 0,
};
const source = tree.source[start_index..];
var i: usize = 0;
while (i + start_index < token_start) {
const c = source[i];
if (c == '\n') {
loc.line += 1;
loc.column = 0;
i += 1;
} else {
if (encoding == .utf16) {
const n = try std.unicode.utf8ByteSequenceLength(c);
if (i + n >= source.len)
return error.CodepointTooLong;
const codepoint = try std.unicode.utf8Decode(source[i .. i + n]);
loc.column += 1 + @as(usize, @boolToInt(codepoint >= 0x10000));
i += n;
} else {
loc.column += 1;
i += 1;
}
}
}
loc.offset = i + start_index;
return loc;
}
/// Asserts the token is comprised of valid utf8
pub fn tokenLength(tree: ast.Tree, token: ast.TokenIndex, encoding: Encoding) usize {
const token_loc = tokenLocation(tree, token);
if (encoding == .utf8)
return token_loc.end - token_loc.start;
var i: usize = token_loc.start;
var utf16_len: usize = 0;
while (i < token_loc.end) {
const n = std.unicode.utf8ByteSequenceLength(tree.source[i]) catch unreachable;
const codepoint = std.unicode.utf8Decode(tree.source[i .. i + n]) catch unreachable;
if (codepoint < 0x10000) {
utf16_len += 1;
} else {
utf16_len += 2;
}
i += n;
}
return utf16_len;
}
/// Token location inside source
pub const Loc = struct {
start: usize,
end: usize,
};
pub fn tokenLocation(tree: ast.Tree, token_index: ast.TokenIndex) Loc {
const start = tree.tokens.items(.start)[token_index];
const tag = tree.tokens.items(.tag)[token_index];
// For some tokens, re-tokenization is needed to find the end.
var tokenizer: std.zig.Tokenizer = .{
.buffer = tree.source,
.index = start,
.pending_invalid_token = null,
};
const token = tokenizer.next();
std.debug.assert(token.tag == tag);
return .{ .start = token.loc.start, .end = token.loc.end };
}
pub fn documentRange(doc: types.TextDocument, encoding: Encoding) !types.Range {
var line_idx: i64 = 0;
var curr_line: []const u8 = doc.text;
var split_iterator = std.mem.split(doc.text, "\n");
while (split_iterator.next()) |line| : (line_idx += 1) {
curr_line = line;
}
if (encoding == .utf8) {
return types.Range{
.start = .{
.line = 0,
.character = 0,
},
.end = .{
.line = line_idx,
.character = @intCast(i64, curr_line.len),
},
};
} else {
var utf16_len: usize = 0;
var line_utf8_idx: usize = 0;
while (line_utf8_idx < curr_line.len) {
const n = try std.unicode.utf8ByteSequenceLength(curr_line[line_utf8_idx]);
const codepoint = try std.unicode.utf8Decode(curr_line[line_utf8_idx .. line_utf8_idx + n]);
if (codepoint < 0x10000) {
utf16_len += 1;
} else {
utf16_len += 2;
}
line_utf8_idx += n;
}
return types.Range{
.start = .{
.line = 0,
.character = 0,
},
.end = .{
.line = line_idx,
.character = @intCast(i64, utf16_len),
},
};
}
}
// Updated version from std that allows for failures
// by removing the unreachables and returning up to that point
// so that we can always provide information while the user is still typing
pub fn lastToken(tree: ast.Tree, node: ast.Node.Index) ast.TokenIndex {
const Node = ast.Node;
const TokenIndex = ast.TokenIndex;
const tags = tree.nodes.items(.tag);
const datas = tree.nodes.items(.data);
const main_tokens = tree.nodes.items(.main_token);
const token_starts = tree.tokens.items(.start);
const token_tags = tree.tokens.items(.tag);
var n = node;
var end_offset: TokenIndex = 0;
while (true) switch (tags[n]) {
.root => return @intCast(TokenIndex, tree.tokens.len - 1),
.@"usingnamespace",
.bool_not,
.negation,
.bit_not,
.negation_wrap,
.address_of,
.@"try",
.@"await",
.optional_type,
.@"resume",
.@"nosuspend",
.@"comptime",
=> n = datas[n].lhs,
.test_decl,
.@"errdefer",
.@"defer",
.@"catch",
.equal_equal,
.bang_equal,
.less_than,
.greater_than,
.less_or_equal,
.greater_or_equal,
.assign_mul,
.assign_div,
.assign_mod,
.assign_add,
.assign_sub,
.assign_bit_shift_left,
.assign_bit_shift_right,
.assign_bit_and,
.assign_bit_xor,
.assign_bit_or,
.assign_mul_wrap,
.assign_add_wrap,
.assign_sub_wrap,
.assign,
.merge_error_sets,
.mul,
.div,
.mod,
.array_mult,
.mul_wrap,
.add,
.sub,
.array_cat,
.add_wrap,
.sub_wrap,
.bit_shift_left,
.bit_shift_right,
.bit_and,
.bit_xor,
.bit_or,
.@"orelse",
.bool_and,
.bool_or,
.anyframe_type,
.error_union,
.if_simple,
.while_simple,
.for_simple,
.fn_proto_simple,
.fn_proto_multi,
.ptr_type_aligned,
.ptr_type_sentinel,
.ptr_type,
.ptr_type_bit_range,
.array_type,
.switch_case_one,
.switch_case,
.switch_range,
=> n = datas[n].rhs,
.field_access,
.unwrap_optional,
.grouped_expression,
.multiline_string_literal,
.error_set_decl,
.asm_simple,
.asm_output,
.asm_input,
.error_value,
=> return datas[n].rhs + end_offset,
.@"anytype",
.anyframe_literal,
.char_literal,
.integer_literal,
.float_literal,
.false_literal,
.true_literal,
.null_literal,
.undefined_literal,
.unreachable_literal,
.identifier,
.deref,
.enum_literal,
.string_literal,
=> return main_tokens[n] + end_offset,
.@"return" => if (datas[n].lhs != 0) {
n = datas[n].lhs;
} else {
return main_tokens[n] + end_offset;
},
.call, .async_call => {
end_offset += 1; // for the rparen
const params = tree.extraData(datas[n].rhs, Node.SubRange);
if (params.end - params.start == 0) {
return main_tokens[n] + end_offset;
}
n = tree.extra_data[params.end - 1]; // last parameter
},
.tagged_union_enum_tag => {
const members = tree.extraData(datas[n].rhs, Node.SubRange);
if (members.end - members.start == 0) {
end_offset += 4; // for the rparen + rparen + lbrace + rbrace
n = datas[n].lhs;
} else {
end_offset += 1; // for the rbrace
n = tree.extra_data[members.end - 1]; // last parameter
}
},
.call_comma,
.async_call_comma,
.tagged_union_enum_tag_trailing,
=> {
end_offset += 2; // for the comma/semicolon + rparen/rbrace
const params = tree.extraData(datas[n].rhs, Node.SubRange);
std.debug.assert(params.end > params.start);
n = tree.extra_data[params.end - 1]; // last parameter
},
.@"switch" => {
const cases = tree.extraData(datas[n].rhs, Node.SubRange);
if (cases.end - cases.start == 0) {
end_offset += 3; // rparen, lbrace, rbrace
n = datas[n].lhs; // condition expression
} else {
end_offset += 1; // for the rbrace
n = tree.extra_data[cases.end - 1]; // last case
}
},
.container_decl_arg => {
const members = tree.extraData(datas[n].rhs, Node.SubRange);
if (members.end - members.start == 0) {
end_offset += 3; // for the rparen + lbrace + rbrace
n = datas[n].lhs;
} else {
end_offset += 1; // for the rbrace
n = tree.extra_data[members.end - 1]; // last parameter
}
},
.@"asm" => {
const extra = tree.extraData(datas[n].rhs, Node.Asm);
return extra.rparen + end_offset;
},
.array_init,
.struct_init,
=> {
const elements = tree.extraData(datas[n].rhs, Node.SubRange);
std.debug.assert(elements.end - elements.start > 0);
end_offset += 1; // for the rbrace
n = tree.extra_data[elements.end - 1]; // last element
},
.array_init_comma,
.struct_init_comma,
.container_decl_arg_trailing,
.switch_comma,
=> {
const members = tree.extraData(datas[n].rhs, Node.SubRange);
std.debug.assert(members.end - members.start > 0);
end_offset += 2; // for the comma + rbrace
n = tree.extra_data[members.end - 1]; // last parameter
},
.array_init_dot,
.struct_init_dot,
.block,
.container_decl,
.tagged_union,
.builtin_call,
=> {
std.debug.assert(datas[n].rhs - datas[n].lhs > 0);
end_offset += 1; // for the rbrace
n = tree.extra_data[datas[n].rhs - 1]; // last statement
},
.array_init_dot_comma,
.struct_init_dot_comma,
.block_semicolon,
.container_decl_trailing,
.tagged_union_trailing,
.builtin_call_comma,
=> {
std.debug.assert(datas[n].rhs - datas[n].lhs > 0);
end_offset += 2; // for the comma/semicolon + rbrace/rparen
n = tree.extra_data[datas[n].rhs - 1]; // last member
},
.call_one,
.async_call_one,
.array_access,
=> {
end_offset += 1; // for the rparen/rbracket
if (datas[n].rhs == 0) {
return main_tokens[n] + end_offset;
}
n = datas[n].rhs;
},
.array_init_dot_two,
.block_two,
.builtin_call_two,
.struct_init_dot_two,
.container_decl_two,
.tagged_union_two,
=> {
if (datas[n].rhs != 0) {
end_offset += 1; // for the rparen/rbrace
n = datas[n].rhs;
} else if (datas[n].lhs != 0) {
end_offset += 1; // for the rparen/rbrace
n = datas[n].lhs;
} else {
switch (tags[n]) {
.array_init_dot_two,
.block_two,
.struct_init_dot_two,
=> end_offset += 1, // rbrace
.builtin_call_two => end_offset += 2, // lparen/lbrace + rparen/rbrace
.container_decl_two => {
var i: u32 = 2; // lbrace + rbrace
while (token_tags[main_tokens[n] + i] == .container_doc_comment) i += 1;
end_offset += i;
},
.tagged_union_two => {
var i: u32 = 5; // (enum) {}
while (token_tags[main_tokens[n] + i] == .container_doc_comment) i += 1;
end_offset += i;
},
else => unreachable,
}
return main_tokens[n] + end_offset;
}
},
.array_init_dot_two_comma,
.builtin_call_two_comma,
.block_two_semicolon,
.struct_init_dot_two_comma,
.container_decl_two_trailing,
.tagged_union_two_trailing,
=> {
end_offset += 2; // for the comma/semicolon + rbrace/rparen
if (datas[n].rhs != 0) {
n = datas[n].rhs;
} else if (datas[n].lhs != 0) {
n = datas[n].lhs;
} else {
return main_tokens[n] + end_offset; // returns { }
}
},
.simple_var_decl => {
if (datas[n].rhs != 0) {
n = datas[n].rhs;
} else if (datas[n].lhs != 0) {
n = datas[n].lhs;
} else {
end_offset += 1; // from mut token to name
return main_tokens[n] + end_offset;
}
},
.aligned_var_decl => {
if (datas[n].rhs != 0) {
n = datas[n].rhs;
} else if (datas[n].lhs != 0) {
end_offset += 1; // for the rparen
n = datas[n].lhs;
} else {
end_offset += 1; // from mut token to name
return main_tokens[n] + end_offset;
}
},
.global_var_decl => {
if (datas[n].rhs != 0) {
n = datas[n].rhs;
} else {
const extra = tree.extraData(datas[n].lhs, Node.GlobalVarDecl);
if (extra.section_node != 0) {
end_offset += 1; // for the rparen
n = extra.section_node;
} else if (extra.align_node != 0) {
end_offset += 1; // for the rparen
n = extra.align_node;
} else if (extra.type_node != 0) {
n = extra.type_node;
} else {
end_offset += 1; // from mut token to name
return main_tokens[n] + end_offset;
}
}
},
.local_var_decl => {
if (datas[n].rhs != 0) {
n = datas[n].rhs;
} else {
const extra = tree.extraData(datas[n].lhs, Node.LocalVarDecl);
if (extra.align_node != 0) {
end_offset += 1; // for the rparen
n = extra.align_node;
} else if (extra.type_node != 0) {
n = extra.type_node;
} else {
end_offset += 1; // from mut token to name
return main_tokens[n] + end_offset;
}
}
},
.container_field_init => {
if (datas[n].rhs != 0) {
n = datas[n].rhs;
} else if (datas[n].lhs != 0) {
n = datas[n].lhs;
} else {
return main_tokens[n] + end_offset;
}
},
.container_field_align => {
if (datas[n].rhs != 0) {
end_offset += 1; // for the rparen
n = datas[n].rhs;
} else if (datas[n].lhs != 0) {
n = datas[n].lhs;
} else {
return main_tokens[n] + end_offset;
}
},
.container_field => {
const extra = tree.extraData(datas[n].rhs, Node.ContainerField);
if (extra.value_expr != 0) {
n = extra.value_expr;
} else if (extra.align_expr != 0) {
end_offset += 1; // for the rparen
n = extra.align_expr;
} else if (datas[n].lhs != 0) {
n = datas[n].lhs;
} else {
return main_tokens[n] + end_offset;
}
},
.array_init_one,
.struct_init_one,
=> {
end_offset += 1; // rbrace
if (datas[n].rhs == 0) {
return main_tokens[n] + end_offset;
} else {
n = datas[n].rhs;
}
},
.slice_open,
.call_one_comma,
.async_call_one_comma,
.array_init_one_comma,
.struct_init_one_comma,
=> {
end_offset += 2; // ellipsis2 + rbracket, or comma + rparen
n = datas[n].rhs;
std.debug.assert(n != 0);
},
.slice => {
const extra = tree.extraData(datas[n].rhs, Node.Slice);
std.debug.assert(extra.end != 0); // should have used slice_open
end_offset += 1; // rbracket
n = extra.end;
},
.slice_sentinel => {
const extra = tree.extraData(datas[n].rhs, Node.SliceSentinel);
std.debug.assert(extra.sentinel != 0); // should have used slice
end_offset += 1; // rbracket
n = extra.sentinel;
},
.@"continue" => {
if (datas[n].lhs != 0) {
return datas[n].lhs + end_offset;
} else {
return main_tokens[n] + end_offset;
}
},
.@"break" => {
if (datas[n].rhs != 0) {
n = datas[n].rhs;
} else if (datas[n].lhs != 0) {
return datas[n].lhs + end_offset;
} else {
return main_tokens[n] + end_offset;
}
},
.fn_decl => {
if (datas[n].rhs != 0) {
n = datas[n].rhs;
} else {
n = datas[n].lhs;
}
},
.fn_proto_one => {
const extra = tree.extraData(datas[n].lhs, Node.FnProtoOne);
// linksection, callconv, align can appear in any order, so we
// find the last one here.
var max_node: Node.Index = datas[n].rhs;
var max_start = token_starts[main_tokens[max_node]];
var max_offset: TokenIndex = 0;
if (extra.align_expr != 0) {
const start = token_starts[main_tokens[extra.align_expr]];
if (start > max_start) {
max_node = extra.align_expr;
max_start = start;
max_offset = 1; // for the rparen
}
}
if (extra.section_expr != 0) {
const start = token_starts[main_tokens[extra.section_expr]];
if (start > max_start) {
max_node = extra.section_expr;
max_start = start;
max_offset = 1; // for the rparen
}
}
if (extra.callconv_expr != 0) {
const start = token_starts[main_tokens[extra.callconv_expr]];
if (start > max_start) {
max_node = extra.callconv_expr;
max_start = start;
max_offset = 1; // for the rparen
}
}
n = max_node;
end_offset += max_offset;
},
.fn_proto => {
const extra = tree.extraData(datas[n].lhs, Node.FnProto);
// linksection, callconv, align can appear in any order, so we
// find the last one here.
var max_node: Node.Index = datas[n].rhs;
var max_start = token_starts[main_tokens[max_node]];
var max_offset: TokenIndex = 0;
if (extra.align_expr != 0) {
const start = token_starts[main_tokens[extra.align_expr]];
if (start > max_start) {
max_node = extra.align_expr;
max_start = start;
max_offset = 1; // for the rparen
}
}
if (extra.section_expr != 0) {
const start = token_starts[main_tokens[extra.section_expr]];
if (start > max_start) {
max_node = extra.section_expr;
max_start = start;
max_offset = 1; // for the rparen
}
}
if (extra.callconv_expr != 0) {
const start = token_starts[main_tokens[extra.callconv_expr]];
if (start > max_start) {
max_node = extra.callconv_expr;
max_start = start;
max_offset = 1; // for the rparen
}
}
n = max_node;
end_offset += max_offset;
},
.while_cont => {
const extra = tree.extraData(datas[n].rhs, Node.WhileCont);
std.debug.assert(extra.then_expr != 0);
n = extra.then_expr;
},
.@"while" => {
const extra = tree.extraData(datas[n].rhs, Node.While);
std.debug.assert(extra.else_expr != 0);
n = extra.else_expr;
},
.@"if", .@"for" => {
const extra = tree.extraData(datas[n].rhs, Node.If);
std.debug.assert(extra.else_expr != 0);
n = extra.else_expr;
},
.@"suspend" => {
if (datas[n].lhs != 0) {
n = datas[n].lhs;
} else {
return main_tokens[n] + end_offset;
}
},
.array_type_sentinel => {
const extra = tree.extraData(datas[n].rhs, Node.ArrayTypeSentinel);
n = extra.elem_type;
},
};
}