Removed comment preprocessing code from the semantic tokens gap highlighter,

Fixed various comment semantic highlighting issues.
Refactored some minor parts of the analysis code.
This commit is contained in:
Alexandros Naskos 2021-03-26 21:04:51 +02:00
parent 2a57789be8
commit 06518778c1
No known key found for this signature in database
GPG Key ID: 02BF2E72B0EA32D2
5 changed files with 156 additions and 125 deletions

View File

@ -228,7 +228,6 @@ pub fn getDeclNameToken(tree: ast.Tree, node: ast.Node.Index) ?ast.TokenIndex {
.global_var_decl => tree.globalVarDecl(node).ast.mut_token + 1,
.simple_var_decl => tree.simpleVarDecl(node).ast.mut_token + 1,
.aligned_var_decl => tree.alignedVarDecl(node).ast.mut_token + 1,
// function declaration names
.fn_proto,
.fn_proto_multi,
@ -324,8 +323,8 @@ fn resolveVarDeclAliasInternal(
.other => |n| n,
else => return null,
};
if (resolved_node >= node_tags.len or !isContainer(node_tags[resolved_node])) return null;
const resolved_tree_tags = resolved.handle.tree.nodes.items(.tag);
if (!isContainer(resolved.handle.tree, resolved_node)) return null;
break :block NodeWithHandle{ .node = resolved_node, .handle = resolved.handle };
} else return null;
@ -1117,9 +1116,9 @@ pub const TypeWithHandle = struct {
const tree = self.handle.tree;
const node = self.type.data.other;
const tags = tree.nodes.items(.tag);
if (isContainer(tags[node])) {
if (isContainer(tree, node)) {
var buf: [2]ast.Node.Index = undefined;
for (declMembers(tree, tags[node], node, &buf)) |child| {
for (declMembers(tree, node, &buf)) |child| {
if (tags[child].isContainerField()) return false;
}
}
@ -1478,8 +1477,8 @@ pub fn fnProto(tree: ast.Tree, node: ast.Node.Index, buf: *[1]ast.Node.Index) ?a
pub fn getImportStr(tree: ast.Tree, node: ast.Node.Index, source_index: usize) ?[]const u8 {
const node_tags = tree.nodes.items(.tag);
var buf: [2]ast.Node.Index = undefined;
if (isContainer(node_tags[node])) {
const decls = declMembers(tree, node_tags[node], node, &buf);
if (isContainer(tree, node)) {
const decls = declMembers(tree, node, &buf);
for (decls) |decl_idx| {
if (getImportStr(tree, decl_idx, source_index)) |name| {
return name;
@ -1806,7 +1805,7 @@ fn addOutlineNodes(allocator: *std.mem.Allocator, tree: ast.Tree, child: ast.Nod
.tagged_union_two_trailing,
=> {
var buf: [2]ast.Node.Index = undefined;
for (declMembers(tree, tree.nodes.items(.tag)[child], child, &buf)) |member|
for (declMembers(tree, child, &buf)) |member|
try addOutlineNodes(allocator, tree, member, context);
return;
},
@ -1894,9 +1893,9 @@ fn getDocumentSymbolsInternal(allocator: *std.mem.Allocator, tree: ast.Tree, nod
.encoding = context.encoding,
};
if (isContainer(tags[node])) {
if (isContainer(tree, node)) {
var buf: [2]ast.Node.Index = undefined;
for (declMembers(tree, tags[node], node, &buf)) |child|
for (declMembers(tree, node, &buf)) |child|
try addOutlineNodes(allocator, tree, child, &child_context);
}
@ -2091,7 +2090,7 @@ fn findContainerScope(container_handle: NodeWithHandle) ?*Scope {
const container = container_handle.node;
const handle = container_handle.handle;
if (!isContainer(handle.tree.nodes.items(.tag)[container])) return null;
if (!isContainer(handle.tree, container)) return null;
// Find the container scope.
return for (handle.document_scope.scopes) |*scope| {
@ -2534,8 +2533,8 @@ fn nodeSourceRange(tree: ast.Tree, node: ast.Node.Index) SourceRange {
};
}
pub fn isContainer(tag: ast.Node.Tag) bool {
return switch (tag) {
pub fn isContainer(tree: ast.Tree, node: ast.Node.Index) bool {
return switch (tree.nodes.items(.tag)[node]) {
.container_decl,
.container_decl_trailing,
.container_decl_arg,
@ -2557,9 +2556,9 @@ pub fn isContainer(tag: ast.Node.Tag) bool {
/// Returns the member indices of a given declaration container.
/// Asserts given `tag` is a container node
pub fn declMembers(tree: ast.Tree, tag: ast.Node.Tag, node_idx: ast.Node.Index, buffer: *[2]ast.Node.Index) []const ast.Node.Index {
std.debug.assert(isContainer(tag));
return switch (tag) {
pub fn declMembers(tree: ast.Tree, node_idx: ast.Node.Index, buffer: *[2]ast.Node.Index) []const ast.Node.Index {
std.debug.assert(isContainer(tree, node_idx));
return switch (tree.nodes.items(.tag)[node_idx]) {
.container_decl, .container_decl_trailing => tree.containerDecl(node_idx).ast.members,
.container_decl_arg, .container_decl_arg_trailing => tree.containerDeclArg(node_idx).ast.members,
.container_decl_two, .container_decl_two_trailing => tree.containerDeclTwo(buffer, node_idx).ast.members,
@ -2598,11 +2597,11 @@ fn makeScopeInternal(
const token_tags = tree.tokens.items(.tag);
const data = tree.nodes.items(.data);
const main_tokens = tree.nodes.items(.main_token);
const node = tags[node_idx];
const node_tag = tags[node_idx];
if (isContainer(node)) {
if (isContainer(tree, node_idx)) {
var buf: [2]ast.Node.Index = undefined;
const ast_decls = declMembers(tree, node, node_idx, &buf);
const ast_decls = declMembers(tree,node_idx, &buf);
(try scopes.addOne(allocator)).* = .{
.range = nodeSourceRange(tree, node_idx),
@ -2635,7 +2634,7 @@ fn makeScopeInternal(
continue;
}
if (node == .error_set_decl) {
if (node_tag == .error_set_decl) {
(try error_completions.addOne(allocator)).* = .{
.label = name,
.kind = .Constant,
@ -2655,11 +2654,11 @@ fn makeScopeInternal(
if (container_field) |field| {
const empty_field = field.ast.type_expr == 0 and field.ast.value_expr == 0;
if (empty_field and node == .root) {
if (empty_field and node_tag == .root) {
continue;
}
const container_decl: ?ast.full.ContainerDecl = switch (node) {
const container_decl: ?ast.full.ContainerDecl = switch (node_tag) {
.container_decl, .container_decl_trailing => tree.containerDecl(node_idx),
.container_decl_arg, .container_decl_arg_trailing => tree.containerDeclArg(node_idx),
.container_decl_two, .container_decl_two_trailing => blk: {
@ -2704,7 +2703,7 @@ fn makeScopeInternal(
return;
}
switch (node) {
switch (node_tag) {
.fn_proto,
.fn_proto_one,
.fn_proto_simple,
@ -2782,7 +2781,7 @@ fn makeScopeInternal(
uses.deinit();
}
const statements: []const ast.Node.Index = switch (node) {
const statements: []const ast.Node.Index = switch (node_tag) {
.block, .block_semicolon => tree.extra_data[data[node_idx].lhs..data[node_idx].rhs],
.block_two, .block_two_semicolon => blk: {
const statements = &[_]ast.Node.Index{ data[node_idx].lhs, data[node_idx].rhs };
@ -2819,7 +2818,7 @@ fn makeScopeInternal(
.@"if",
.if_simple,
=> {
const if_node: ast.full.If = if (node == .@"if")
const if_node: ast.full.If = if (node_tag == .@"if")
tree.ifFull(node_idx)
else
tree.ifSimple(node_idx);
@ -2879,8 +2878,8 @@ fn makeScopeInternal(
.while_cont,
.@"for",
.for_simple,
=> |tag| {
const while_node: ast.full.While = switch (node) {
=> {
const while_node: ast.full.While = switch (node_tag) {
.@"while" => tree.whileFull(node_idx),
.while_simple => tree.whileSimple(node_idx),
.while_cont => tree.whileCont(node_idx),
@ -2889,7 +2888,7 @@ fn makeScopeInternal(
else => unreachable,
};
const is_for = tag == .@"for" or tag == .for_simple;
const is_for = node_tag == .@"for" or node_tag == .for_simple;
if (while_node.label_token) |label| {
std.debug.assert(token_tags[label] == .identifier);
@ -3043,7 +3042,7 @@ fn makeScopeInternal(
.async_call_one_comma,
=> {
var buf: [1]ast.Node.Index = undefined;
const call: ast.full.Call = switch (node) {
const call: ast.full.Call = switch (node_tag) {
.async_call,
.async_call_comma,
.call,
@ -3071,7 +3070,7 @@ fn makeScopeInternal(
.struct_init_one_comma,
=> {
var buf: [2]ast.Node.Index = undefined;
const struct_init: ast.full.StructInit = switch (node) {
const struct_init: ast.full.StructInit = switch (node_tag) {
.struct_init, .struct_init_comma => tree.structInit(node_idx),
.struct_init_dot, .struct_init_dot_comma => tree.structInitDot(node_idx),
.struct_init_dot_two, .struct_init_dot_two_comma => tree.structInitDotTwo(&buf, node_idx),
@ -3096,7 +3095,7 @@ fn makeScopeInternal(
.array_init_one_comma,
=> {
var buf: [2]ast.Node.Index = undefined;
const array_init: ast.full.ArrayInit = switch (node) {
const array_init: ast.full.ArrayInit = switch (node_tag) {
.array_init, .array_init_comma => tree.arrayInit(node_idx),
.array_init_dot, .array_init_dot_comma => tree.arrayInitDot(node_idx),
.array_init_dot_two, .array_init_dot_two_comma => tree.arrayInitDotTwo(&buf, node_idx),
@ -3129,7 +3128,7 @@ fn makeScopeInternal(
.builtin_call_two_comma,
=> {
const b_data = data[node_idx];
const params = switch (node) {
const params = switch (node_tag) {
.builtin_call, .builtin_call_comma => tree.extra_data[b_data.lhs..b_data.rhs],
.builtin_call_two, .builtin_call_two_comma => if (b_data.lhs == 0)
&[_]ast.Node.Index{}
@ -3161,7 +3160,7 @@ fn makeScopeInternal(
.slice_open,
.slice_sentinel,
=> {
const slice: ast.full.Slice = switch (node) {
const slice: ast.full.Slice = switch (node_tag) {
.slice => tree.slice(node_idx),
.slice_open => tree.sliceOpen(node_idx),
.slice_sentinel => tree.sliceSentinel(node_idx),

View File

@ -359,7 +359,7 @@ fn nodeToCompletion(
else
null;
if (analysis.isContainer(node_tags[node])) {
if (analysis.isContainer(handle.tree, node)) {
const context = DeclToCompletionContext{
.completions = list,
.config = &config,

View File

@ -54,6 +54,30 @@ pub fn documentPosition(doc: types.TextDocument, position: types.Position, encod
}
}
pub fn lineSectionLength(tree: ast.Tree, start_index: usize, end_index: usize, encoding: Encoding) !usize {
const source = tree.source[start_index..];
std.debug.assert(end_index >= start_index and source.len >= end_index - start_index);
if (encoding == .utf8) {
return end_index - start_index;
}
var result: usize = 0;
var i: usize = 0;
while (i + start_index < end_index) {
std.debug.assert(source[i] != '\n');
const n = try std.unicode.utf8ByteSequenceLength(source[i]);
if (i + n >= source.len)
return error.CodepointTooLong;
const codepoint = try std.unicode.utf8Decode(source[i .. i + n]);
result += 1 + @as(usize, @boolToInt(codepoint >= 0x10000));
i += n;
}
return result;
}
pub const TokenLocation = struct {
line: usize,
column: usize,
@ -71,15 +95,14 @@ pub const TokenLocation = struct {
}
};
pub fn tokenRelativeLocation(tree: ast.Tree, start_index: usize, next_token_index: usize, encoding: Encoding) !TokenLocation {
const start = next_token_index;
pub fn tokenRelativeLocation(tree: ast.Tree, start_index: usize, token_start: usize, encoding: Encoding) !TokenLocation {
std.debug.assert(token_start >= start_index);
var loc = TokenLocation{
.line = 0,
.column = 0,
.offset = 0,
};
const token_start = start;
const source = tree.source[start_index..];
var i: usize = 0;
while (i + start_index < token_start) {
@ -91,12 +114,11 @@ pub fn tokenRelativeLocation(tree: ast.Tree, start_index: usize, next_token_inde
} else {
if (encoding == .utf16) {
const n = try std.unicode.utf8ByteSequenceLength(c);
if (i + n >= source.len)
return error.CodepointTooLong;
const codepoint = try std.unicode.utf8Decode(source[i .. i + n]);
if (codepoint < 0x10000) {
loc.column += 1;
} else {
loc.column += 2;
}
loc.column += 1 + @as(usize, @boolToInt(codepoint >= 0x10000));
i += n;
} else {
loc.column += 1;

View File

@ -119,7 +119,7 @@ fn symbolReferencesInternal(
.error_set_decl,
=> {
var buf: [2]ast.Node.Index = undefined;
for (analysis.declMembers(tree, node_tags[node], node, &buf)) |member|
for (analysis.declMembers(tree, node, &buf)) |member|
try symbolReferencesInternal(arena, store, .{ .node = member, .handle = handle }, decl, encoding, context, handler);
},
.global_var_decl,

View File

@ -75,6 +75,36 @@ const Builder = struct {
};
}
fn highlightComment(
self: *Builder,
prev_end: usize,
comment_start: usize,
comment_end: usize,
token_modifiers: TokenModifiers,
) !void {
const comment_delta = offsets.tokenRelativeLocation(
self.handle.tree,
prev_end,
comment_start,
self.encoding,
) catch return;
const comment_length = offsets.lineSectionLength(
self.handle.tree,
comment_start,
comment_end,
self.encoding,
) catch return;
try self.arr.appendSlice(&.{
@truncate(u32, comment_delta.line),
@truncate(u32, comment_delta.column),
@truncate(u32, comment_length),
@enumToInt(TokenType.comment),
token_modifiers.toInt(),
});
}
fn add(self: *Builder, token: ast.TokenIndex, token_type: TokenType, token_modifiers: TokenModifiers) !void {
const starts = self.handle.tree.tokens.items(.start);
var start_idx = if (self.current_token) |current_token|
@ -85,24 +115,67 @@ const Builder = struct {
if (start_idx > starts[token])
return;
const delta_loc = while (self.findCommentBetween(start_idx, starts[token])) |comment| {
const old_loc = self.handle.tree.tokenLocation(0, self.current_token orelse 0);
const comment_delta = offsets.tokenRelativeLocation(self.handle.tree, start_idx, comment.start, self.encoding) catch return;
var comments_end: usize = start_idx;
var comments_start: usize = start_idx;
// Highlight comments in the gap
{
const source = self.handle.tree.source;
var state: enum { none, comment, doc_comment, comment_start } = .none;
var prev_byte = source[start_idx];
var i: usize = start_idx + 1;
while (i < starts[token]) : ({
prev_byte = source[i];
i += 1;
}) {
if (prev_byte == '/' and source[i] == '/') {
switch (state) {
.none => {
comments_start = i - 1;
state = .comment_start;
},
.comment_start => state = .doc_comment,
else => {},
}
continue;
} else if (prev_byte == '/' and source[i] == '!' and state == .comment_start) {
state = .doc_comment;
continue;
}
try self.arr.appendSlice(&[_]u32{
@truncate(u32, comment_delta.line),
@truncate(u32, comment_delta.column),
comment.length,
@enumToInt(TokenType.comment),
0,
});
if (source[i] == '\n' and state != .none) {
try self.highlightComment(comments_end, comments_start, i, switch (state) {
.comment, .comment_start => .{},
.doc_comment => .{ .documentation = true },
else => unreachable,
});
comments_end = i;
state = .none;
} else if (state == .comment_start) {
state = .comment;
}
}
if (state != .none) {
try self.highlightComment(comments_end, comments_start, i, switch (state) {
.comment, .comment_start => .{},
.doc_comment => .{ .documentation = true },
else => unreachable,
});
// @@@
// comments_end = i;
}
}
start_idx = comment.start;
} else offsets.tokenRelativeLocation(self.handle.tree, start_idx, starts[token], self.encoding) catch return;
std.debug.print("DELTA:\n```\n{s}\n```\n(LEN: {})\n", .{self.handle.tree.source[comments_end..starts[token]], starts[token] - comments_end});
const delta = offsets.tokenRelativeLocation(
self.handle.tree,
comments_start,
starts[token],
self.encoding,
) catch return;
try self.arr.appendSlice(&[_]u32{
@truncate(u32, delta_loc.line),
@truncate(u32, delta_loc.column),
try self.arr.appendSlice(&.{
@truncate(u32, delta.line),
@truncate(u32, delta.column),
@truncate(u32, offsets.tokenLength(self.handle.tree, token, self.encoding)),
@enumToInt(token_type),
token_modifiers.toInt(),
@ -113,15 +186,6 @@ const Builder = struct {
fn toOwnedSlice(self: *Builder) []u32 {
return self.arr.toOwnedSlice();
}
/// Based on a given start and end index, returns a `Comment` between the positions
/// Returns `null` if none was fone
fn findCommentBetween(self: Builder, from: u32, to: u32) ?Comment {
return for (self.comments.items) |comment| {
if (comment.start > from and comment.start < to)
break comment;
} else null;
}
};
fn writeToken(
@ -710,7 +774,7 @@ fn writeNodeTokens(
.node = struct_init.ast.type_expr,
.handle = handle,
})) |struct_type| switch (struct_type.type.data) {
.other => |type_node| if (analysis.isContainer(struct_type.handle.tree.nodes.items(.tag)[type_node]))
.other => |type_node| if (analysis.isContainer(struct_type.handle.tree, type_node))
fieldTokenType(type_node, struct_type.handle)
else
null,
@ -976,7 +1040,7 @@ fn writeNodeTokens(
switch (decl_type.decl.*) {
.ast_node => |decl_node| {
if (decl_type.handle.tree.nodes.items(.tag)[decl_node].isContainerField()) {
const tok_type: ?TokenType = if (analysis.isContainer(lhs_type.handle.tree.nodes.items(.tag)[left_type_node]))
const tok_type: ?TokenType = if (analysis.isContainer(lhs_type.handle.tree, left_type_node))
fieldTokenType(decl_node, lhs_type.handle)
else if (left_type_node == 0)
TokenType.field
@ -1083,14 +1147,11 @@ fn writeNodeTokens(
pub fn writeAllSemanticTokens(arena: *std.heap.ArenaAllocator, store: *DocumentStore, handle: *DocumentStore.Handle, encoding: offsets.Encoding) ![]u32 {
var builder = Builder.init(arena.child_allocator, handle, encoding);
// as line comments are not nodes, we parse the text then generate the tokens for them
try findComments(&builder, handle.tree.source, encoding);
// reverse the ast from the root declarations
var gap_highlighter = GapHighlighter.init(&builder, 0);
var buf: [2]ast.Node.Index = undefined;
for (analysis.declMembers(handle.tree, .root, 0, &buf)) |child| {
for (analysis.declMembers(handle.tree, 0, &buf)) |child| {
try gap_highlighter.next(child);
try writeNodeTokens(&builder, arena, store, child);
}
@ -1099,54 +1160,3 @@ pub fn writeAllSemanticTokens(arena: *std.heap.ArenaAllocator, store: *DocumentS
return builder.toOwnedSlice();
}
/// As the AST does not contain nodes for comments
/// this will parse through the entire file to search for comments
/// and generate semantic tokens for them
fn findComments(builder: *Builder, source: []const u8, encoding: offsets.Encoding) !void {
var state: enum { none, comment, doc_comment } = .none;
var prev: u8 = 0;
var start: usize = 0;
for (source) |c, i| {
if (state == .comment and c == '/') {
state = .none;
continue;
}
if (state == .none and c == '/' and prev == '/') {
state = .comment;
start = i - 1;
}
if (c == '\n') {
if (state == .comment) {
state = .none;
const len = if (encoding == .utf8)
i - start
else blk: {
var index: usize = start;
var utf16_len: usize = 0;
while (index < i) {
const n = std.unicode.utf8ByteSequenceLength(source[index]) catch unreachable;
const codepoint = std.unicode.utf8Decode(source[index .. index + n]) catch unreachable;
if (codepoint < 0x10000) {
utf16_len += 1;
} else {
utf16_len += 2;
}
index += n;
}
break :blk utf16_len;
};
try builder.comments.append(.{
.length = @truncate(u32, len),
.start = @truncate(u32, start),
});
}
}
prev = c;
}
}