From 06518778c1bb12891628fab10f79d97b7fbd4e96 Mon Sep 17 00:00:00 2001 From: Alexandros Naskos Date: Fri, 26 Mar 2021 21:04:51 +0200 Subject: [PATCH] Removed comment preprocessing code from the semantic tokens gap highlighter, Fixed various comment semantic highlighting issues. Refactored some minor parts of the analysis code. --- src/analysis.zig | 65 ++++++++------- src/main.zig | 2 +- src/offsets.zig | 40 +++++++--- src/references.zig | 2 +- src/semantic_tokens.zig | 172 +++++++++++++++++++++------------------- 5 files changed, 156 insertions(+), 125 deletions(-) diff --git a/src/analysis.zig b/src/analysis.zig index 3f8a2d0..17df144 100644 --- a/src/analysis.zig +++ b/src/analysis.zig @@ -228,7 +228,6 @@ pub fn getDeclNameToken(tree: ast.Tree, node: ast.Node.Index) ?ast.TokenIndex { .global_var_decl => tree.globalVarDecl(node).ast.mut_token + 1, .simple_var_decl => tree.simpleVarDecl(node).ast.mut_token + 1, .aligned_var_decl => tree.alignedVarDecl(node).ast.mut_token + 1, - // function declaration names .fn_proto, .fn_proto_multi, @@ -324,8 +323,8 @@ fn resolveVarDeclAliasInternal( .other => |n| n, else => return null, }; - - if (resolved_node >= node_tags.len or !isContainer(node_tags[resolved_node])) return null; + const resolved_tree_tags = resolved.handle.tree.nodes.items(.tag); + if (!isContainer(resolved.handle.tree, resolved_node)) return null; break :block NodeWithHandle{ .node = resolved_node, .handle = resolved.handle }; } else return null; @@ -1117,9 +1116,9 @@ pub const TypeWithHandle = struct { const tree = self.handle.tree; const node = self.type.data.other; const tags = tree.nodes.items(.tag); - if (isContainer(tags[node])) { + if (isContainer(tree, node)) { var buf: [2]ast.Node.Index = undefined; - for (declMembers(tree, tags[node], node, &buf)) |child| { + for (declMembers(tree, node, &buf)) |child| { if (tags[child].isContainerField()) return false; } } @@ -1478,8 +1477,8 @@ pub fn fnProto(tree: ast.Tree, node: ast.Node.Index, buf: *[1]ast.Node.Index) ?a pub fn getImportStr(tree: ast.Tree, node: ast.Node.Index, source_index: usize) ?[]const u8 { const node_tags = tree.nodes.items(.tag); var buf: [2]ast.Node.Index = undefined; - if (isContainer(node_tags[node])) { - const decls = declMembers(tree, node_tags[node], node, &buf); + if (isContainer(tree, node)) { + const decls = declMembers(tree, node, &buf); for (decls) |decl_idx| { if (getImportStr(tree, decl_idx, source_index)) |name| { return name; @@ -1806,7 +1805,7 @@ fn addOutlineNodes(allocator: *std.mem.Allocator, tree: ast.Tree, child: ast.Nod .tagged_union_two_trailing, => { var buf: [2]ast.Node.Index = undefined; - for (declMembers(tree, tree.nodes.items(.tag)[child], child, &buf)) |member| + for (declMembers(tree, child, &buf)) |member| try addOutlineNodes(allocator, tree, member, context); return; }, @@ -1894,9 +1893,9 @@ fn getDocumentSymbolsInternal(allocator: *std.mem.Allocator, tree: ast.Tree, nod .encoding = context.encoding, }; - if (isContainer(tags[node])) { + if (isContainer(tree, node)) { var buf: [2]ast.Node.Index = undefined; - for (declMembers(tree, tags[node], node, &buf)) |child| + for (declMembers(tree, node, &buf)) |child| try addOutlineNodes(allocator, tree, child, &child_context); } @@ -2091,7 +2090,7 @@ fn findContainerScope(container_handle: NodeWithHandle) ?*Scope { const container = container_handle.node; const handle = container_handle.handle; - if (!isContainer(handle.tree.nodes.items(.tag)[container])) return null; + if (!isContainer(handle.tree, container)) return null; // Find the container scope. return for (handle.document_scope.scopes) |*scope| { @@ -2534,8 +2533,8 @@ fn nodeSourceRange(tree: ast.Tree, node: ast.Node.Index) SourceRange { }; } -pub fn isContainer(tag: ast.Node.Tag) bool { - return switch (tag) { +pub fn isContainer(tree: ast.Tree, node: ast.Node.Index) bool { + return switch (tree.nodes.items(.tag)[node]) { .container_decl, .container_decl_trailing, .container_decl_arg, @@ -2557,9 +2556,9 @@ pub fn isContainer(tag: ast.Node.Tag) bool { /// Returns the member indices of a given declaration container. /// Asserts given `tag` is a container node -pub fn declMembers(tree: ast.Tree, tag: ast.Node.Tag, node_idx: ast.Node.Index, buffer: *[2]ast.Node.Index) []const ast.Node.Index { - std.debug.assert(isContainer(tag)); - return switch (tag) { +pub fn declMembers(tree: ast.Tree, node_idx: ast.Node.Index, buffer: *[2]ast.Node.Index) []const ast.Node.Index { + std.debug.assert(isContainer(tree, node_idx)); + return switch (tree.nodes.items(.tag)[node_idx]) { .container_decl, .container_decl_trailing => tree.containerDecl(node_idx).ast.members, .container_decl_arg, .container_decl_arg_trailing => tree.containerDeclArg(node_idx).ast.members, .container_decl_two, .container_decl_two_trailing => tree.containerDeclTwo(buffer, node_idx).ast.members, @@ -2598,11 +2597,11 @@ fn makeScopeInternal( const token_tags = tree.tokens.items(.tag); const data = tree.nodes.items(.data); const main_tokens = tree.nodes.items(.main_token); - const node = tags[node_idx]; + const node_tag = tags[node_idx]; - if (isContainer(node)) { + if (isContainer(tree, node_idx)) { var buf: [2]ast.Node.Index = undefined; - const ast_decls = declMembers(tree, node, node_idx, &buf); + const ast_decls = declMembers(tree,node_idx, &buf); (try scopes.addOne(allocator)).* = .{ .range = nodeSourceRange(tree, node_idx), @@ -2635,7 +2634,7 @@ fn makeScopeInternal( continue; } - if (node == .error_set_decl) { + if (node_tag == .error_set_decl) { (try error_completions.addOne(allocator)).* = .{ .label = name, .kind = .Constant, @@ -2655,11 +2654,11 @@ fn makeScopeInternal( if (container_field) |field| { const empty_field = field.ast.type_expr == 0 and field.ast.value_expr == 0; - if (empty_field and node == .root) { + if (empty_field and node_tag == .root) { continue; } - const container_decl: ?ast.full.ContainerDecl = switch (node) { + const container_decl: ?ast.full.ContainerDecl = switch (node_tag) { .container_decl, .container_decl_trailing => tree.containerDecl(node_idx), .container_decl_arg, .container_decl_arg_trailing => tree.containerDeclArg(node_idx), .container_decl_two, .container_decl_two_trailing => blk: { @@ -2704,7 +2703,7 @@ fn makeScopeInternal( return; } - switch (node) { + switch (node_tag) { .fn_proto, .fn_proto_one, .fn_proto_simple, @@ -2782,7 +2781,7 @@ fn makeScopeInternal( uses.deinit(); } - const statements: []const ast.Node.Index = switch (node) { + const statements: []const ast.Node.Index = switch (node_tag) { .block, .block_semicolon => tree.extra_data[data[node_idx].lhs..data[node_idx].rhs], .block_two, .block_two_semicolon => blk: { const statements = &[_]ast.Node.Index{ data[node_idx].lhs, data[node_idx].rhs }; @@ -2819,7 +2818,7 @@ fn makeScopeInternal( .@"if", .if_simple, => { - const if_node: ast.full.If = if (node == .@"if") + const if_node: ast.full.If = if (node_tag == .@"if") tree.ifFull(node_idx) else tree.ifSimple(node_idx); @@ -2879,8 +2878,8 @@ fn makeScopeInternal( .while_cont, .@"for", .for_simple, - => |tag| { - const while_node: ast.full.While = switch (node) { + => { + const while_node: ast.full.While = switch (node_tag) { .@"while" => tree.whileFull(node_idx), .while_simple => tree.whileSimple(node_idx), .while_cont => tree.whileCont(node_idx), @@ -2889,7 +2888,7 @@ fn makeScopeInternal( else => unreachable, }; - const is_for = tag == .@"for" or tag == .for_simple; + const is_for = node_tag == .@"for" or node_tag == .for_simple; if (while_node.label_token) |label| { std.debug.assert(token_tags[label] == .identifier); @@ -3043,7 +3042,7 @@ fn makeScopeInternal( .async_call_one_comma, => { var buf: [1]ast.Node.Index = undefined; - const call: ast.full.Call = switch (node) { + const call: ast.full.Call = switch (node_tag) { .async_call, .async_call_comma, .call, @@ -3071,7 +3070,7 @@ fn makeScopeInternal( .struct_init_one_comma, => { var buf: [2]ast.Node.Index = undefined; - const struct_init: ast.full.StructInit = switch (node) { + const struct_init: ast.full.StructInit = switch (node_tag) { .struct_init, .struct_init_comma => tree.structInit(node_idx), .struct_init_dot, .struct_init_dot_comma => tree.structInitDot(node_idx), .struct_init_dot_two, .struct_init_dot_two_comma => tree.structInitDotTwo(&buf, node_idx), @@ -3096,7 +3095,7 @@ fn makeScopeInternal( .array_init_one_comma, => { var buf: [2]ast.Node.Index = undefined; - const array_init: ast.full.ArrayInit = switch (node) { + const array_init: ast.full.ArrayInit = switch (node_tag) { .array_init, .array_init_comma => tree.arrayInit(node_idx), .array_init_dot, .array_init_dot_comma => tree.arrayInitDot(node_idx), .array_init_dot_two, .array_init_dot_two_comma => tree.arrayInitDotTwo(&buf, node_idx), @@ -3129,7 +3128,7 @@ fn makeScopeInternal( .builtin_call_two_comma, => { const b_data = data[node_idx]; - const params = switch (node) { + const params = switch (node_tag) { .builtin_call, .builtin_call_comma => tree.extra_data[b_data.lhs..b_data.rhs], .builtin_call_two, .builtin_call_two_comma => if (b_data.lhs == 0) &[_]ast.Node.Index{} @@ -3161,7 +3160,7 @@ fn makeScopeInternal( .slice_open, .slice_sentinel, => { - const slice: ast.full.Slice = switch (node) { + const slice: ast.full.Slice = switch (node_tag) { .slice => tree.slice(node_idx), .slice_open => tree.sliceOpen(node_idx), .slice_sentinel => tree.sliceSentinel(node_idx), diff --git a/src/main.zig b/src/main.zig index 86d2026..45bbea7 100644 --- a/src/main.zig +++ b/src/main.zig @@ -359,7 +359,7 @@ fn nodeToCompletion( else null; - if (analysis.isContainer(node_tags[node])) { + if (analysis.isContainer(handle.tree, node)) { const context = DeclToCompletionContext{ .completions = list, .config = &config, diff --git a/src/offsets.zig b/src/offsets.zig index 64a3b89..54d9488 100644 --- a/src/offsets.zig +++ b/src/offsets.zig @@ -54,6 +54,30 @@ pub fn documentPosition(doc: types.TextDocument, position: types.Position, encod } } +pub fn lineSectionLength(tree: ast.Tree, start_index: usize, end_index: usize, encoding: Encoding) !usize { + const source = tree.source[start_index..]; + std.debug.assert(end_index >= start_index and source.len >= end_index - start_index); + if (encoding == .utf8) { + return end_index - start_index; + } + + var result: usize = 0; + var i: usize = 0; + while (i + start_index < end_index) { + std.debug.assert(source[i] != '\n'); + + const n = try std.unicode.utf8ByteSequenceLength(source[i]); + if (i + n >= source.len) + return error.CodepointTooLong; + + const codepoint = try std.unicode.utf8Decode(source[i .. i + n]); + + result += 1 + @as(usize, @boolToInt(codepoint >= 0x10000)); + i += n; + } + return result; +} + pub const TokenLocation = struct { line: usize, column: usize, @@ -71,15 +95,14 @@ pub const TokenLocation = struct { } }; -pub fn tokenRelativeLocation(tree: ast.Tree, start_index: usize, next_token_index: usize, encoding: Encoding) !TokenLocation { - const start = next_token_index; - +pub fn tokenRelativeLocation(tree: ast.Tree, start_index: usize, token_start: usize, encoding: Encoding) !TokenLocation { + std.debug.assert(token_start >= start_index); var loc = TokenLocation{ .line = 0, .column = 0, .offset = 0, }; - const token_start = start; + const source = tree.source[start_index..]; var i: usize = 0; while (i + start_index < token_start) { @@ -91,12 +114,11 @@ pub fn tokenRelativeLocation(tree: ast.Tree, start_index: usize, next_token_inde } else { if (encoding == .utf16) { const n = try std.unicode.utf8ByteSequenceLength(c); + if (i + n >= source.len) + return error.CodepointTooLong; + const codepoint = try std.unicode.utf8Decode(source[i .. i + n]); - if (codepoint < 0x10000) { - loc.column += 1; - } else { - loc.column += 2; - } + loc.column += 1 + @as(usize, @boolToInt(codepoint >= 0x10000)); i += n; } else { loc.column += 1; diff --git a/src/references.zig b/src/references.zig index a10c4f3..37ad26b 100644 --- a/src/references.zig +++ b/src/references.zig @@ -119,7 +119,7 @@ fn symbolReferencesInternal( .error_set_decl, => { var buf: [2]ast.Node.Index = undefined; - for (analysis.declMembers(tree, node_tags[node], node, &buf)) |member| + for (analysis.declMembers(tree, node, &buf)) |member| try symbolReferencesInternal(arena, store, .{ .node = member, .handle = handle }, decl, encoding, context, handler); }, .global_var_decl, diff --git a/src/semantic_tokens.zig b/src/semantic_tokens.zig index 679626b..cda01d2 100644 --- a/src/semantic_tokens.zig +++ b/src/semantic_tokens.zig @@ -75,6 +75,36 @@ const Builder = struct { }; } + fn highlightComment( + self: *Builder, + prev_end: usize, + comment_start: usize, + comment_end: usize, + token_modifiers: TokenModifiers, + ) !void { + const comment_delta = offsets.tokenRelativeLocation( + self.handle.tree, + prev_end, + comment_start, + self.encoding, + ) catch return; + + const comment_length = offsets.lineSectionLength( + self.handle.tree, + comment_start, + comment_end, + self.encoding, + ) catch return; + + try self.arr.appendSlice(&.{ + @truncate(u32, comment_delta.line), + @truncate(u32, comment_delta.column), + @truncate(u32, comment_length), + @enumToInt(TokenType.comment), + token_modifiers.toInt(), + }); + } + fn add(self: *Builder, token: ast.TokenIndex, token_type: TokenType, token_modifiers: TokenModifiers) !void { const starts = self.handle.tree.tokens.items(.start); var start_idx = if (self.current_token) |current_token| @@ -85,24 +115,67 @@ const Builder = struct { if (start_idx > starts[token]) return; - const delta_loc = while (self.findCommentBetween(start_idx, starts[token])) |comment| { - const old_loc = self.handle.tree.tokenLocation(0, self.current_token orelse 0); - const comment_delta = offsets.tokenRelativeLocation(self.handle.tree, start_idx, comment.start, self.encoding) catch return; + var comments_end: usize = start_idx; + var comments_start: usize = start_idx; + // Highlight comments in the gap + { + const source = self.handle.tree.source; + var state: enum { none, comment, doc_comment, comment_start } = .none; + var prev_byte = source[start_idx]; + var i: usize = start_idx + 1; + while (i < starts[token]) : ({ + prev_byte = source[i]; + i += 1; + }) { + if (prev_byte == '/' and source[i] == '/') { + switch (state) { + .none => { + comments_start = i - 1; + state = .comment_start; + }, + .comment_start => state = .doc_comment, + else => {}, + } + continue; + } else if (prev_byte == '/' and source[i] == '!' and state == .comment_start) { + state = .doc_comment; + continue; + } - try self.arr.appendSlice(&[_]u32{ - @truncate(u32, comment_delta.line), - @truncate(u32, comment_delta.column), - comment.length, - @enumToInt(TokenType.comment), - 0, - }); + if (source[i] == '\n' and state != .none) { + try self.highlightComment(comments_end, comments_start, i, switch (state) { + .comment, .comment_start => .{}, + .doc_comment => .{ .documentation = true }, + else => unreachable, + }); + comments_end = i; + state = .none; + } else if (state == .comment_start) { + state = .comment; + } + } + if (state != .none) { + try self.highlightComment(comments_end, comments_start, i, switch (state) { + .comment, .comment_start => .{}, + .doc_comment => .{ .documentation = true }, + else => unreachable, + }); + // @@@ + // comments_end = i; + } + } - start_idx = comment.start; - } else offsets.tokenRelativeLocation(self.handle.tree, start_idx, starts[token], self.encoding) catch return; + std.debug.print("DELTA:\n```\n{s}\n```\n(LEN: {})\n", .{self.handle.tree.source[comments_end..starts[token]], starts[token] - comments_end}); + const delta = offsets.tokenRelativeLocation( + self.handle.tree, + comments_start, + starts[token], + self.encoding, + ) catch return; - try self.arr.appendSlice(&[_]u32{ - @truncate(u32, delta_loc.line), - @truncate(u32, delta_loc.column), + try self.arr.appendSlice(&.{ + @truncate(u32, delta.line), + @truncate(u32, delta.column), @truncate(u32, offsets.tokenLength(self.handle.tree, token, self.encoding)), @enumToInt(token_type), token_modifiers.toInt(), @@ -113,15 +186,6 @@ const Builder = struct { fn toOwnedSlice(self: *Builder) []u32 { return self.arr.toOwnedSlice(); } - - /// Based on a given start and end index, returns a `Comment` between the positions - /// Returns `null` if none was fone - fn findCommentBetween(self: Builder, from: u32, to: u32) ?Comment { - return for (self.comments.items) |comment| { - if (comment.start > from and comment.start < to) - break comment; - } else null; - } }; fn writeToken( @@ -710,7 +774,7 @@ fn writeNodeTokens( .node = struct_init.ast.type_expr, .handle = handle, })) |struct_type| switch (struct_type.type.data) { - .other => |type_node| if (analysis.isContainer(struct_type.handle.tree.nodes.items(.tag)[type_node])) + .other => |type_node| if (analysis.isContainer(struct_type.handle.tree, type_node)) fieldTokenType(type_node, struct_type.handle) else null, @@ -976,7 +1040,7 @@ fn writeNodeTokens( switch (decl_type.decl.*) { .ast_node => |decl_node| { if (decl_type.handle.tree.nodes.items(.tag)[decl_node].isContainerField()) { - const tok_type: ?TokenType = if (analysis.isContainer(lhs_type.handle.tree.nodes.items(.tag)[left_type_node])) + const tok_type: ?TokenType = if (analysis.isContainer(lhs_type.handle.tree, left_type_node)) fieldTokenType(decl_node, lhs_type.handle) else if (left_type_node == 0) TokenType.field @@ -1083,14 +1147,11 @@ fn writeNodeTokens( pub fn writeAllSemanticTokens(arena: *std.heap.ArenaAllocator, store: *DocumentStore, handle: *DocumentStore.Handle, encoding: offsets.Encoding) ![]u32 { var builder = Builder.init(arena.child_allocator, handle, encoding); - // as line comments are not nodes, we parse the text then generate the tokens for them - try findComments(&builder, handle.tree.source, encoding); - // reverse the ast from the root declarations var gap_highlighter = GapHighlighter.init(&builder, 0); var buf: [2]ast.Node.Index = undefined; - for (analysis.declMembers(handle.tree, .root, 0, &buf)) |child| { + for (analysis.declMembers(handle.tree, 0, &buf)) |child| { try gap_highlighter.next(child); try writeNodeTokens(&builder, arena, store, child); } @@ -1099,54 +1160,3 @@ pub fn writeAllSemanticTokens(arena: *std.heap.ArenaAllocator, store: *DocumentS return builder.toOwnedSlice(); } - -/// As the AST does not contain nodes for comments -/// this will parse through the entire file to search for comments -/// and generate semantic tokens for them -fn findComments(builder: *Builder, source: []const u8, encoding: offsets.Encoding) !void { - var state: enum { none, comment, doc_comment } = .none; - - var prev: u8 = 0; - var start: usize = 0; - for (source) |c, i| { - if (state == .comment and c == '/') { - state = .none; - continue; - } - - if (state == .none and c == '/' and prev == '/') { - state = .comment; - start = i - 1; - } - - if (c == '\n') { - if (state == .comment) { - state = .none; - - const len = if (encoding == .utf8) - i - start - else blk: { - var index: usize = start; - var utf16_len: usize = 0; - while (index < i) { - const n = std.unicode.utf8ByteSequenceLength(source[index]) catch unreachable; - const codepoint = std.unicode.utf8Decode(source[index .. index + n]) catch unreachable; - if (codepoint < 0x10000) { - utf16_len += 1; - } else { - utf16_len += 2; - } - index += n; - } - break :blk utf16_len; - }; - - try builder.comments.append(.{ - .length = @truncate(u32, len), - .start = @truncate(u32, start), - }); - } - } - prev = c; - } -}