From 9bed369797df8ea83986b9528f511663fd3615c3 Mon Sep 17 00:00:00 2001 From: Alexandros Naskos Date: Fri, 3 Jul 2020 11:34:42 +0300 Subject: [PATCH] Correctly handle utf16 offsets for semantic tokens --- src/main.zig | 11 ++++---- src/offsets.zig | 61 +++++++++++++++++++++++++++++++++++++++++ src/semantic_tokens.zig | 15 +++++----- 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/src/main.zig b/src/main.zig index caf3141..6fcf2b5 100644 --- a/src/main.zig +++ b/src/main.zig @@ -86,6 +86,7 @@ const ClientCapabilities = struct { }; var client_capabilities = ClientCapabilities{}; +var offset_encoding = offsets.Encoding.utf16; const initialize_response = \\,"result": {"capabilities": {"signatureHelpProvider": {"triggerCharacters": ["(",","]},"textDocumentSync": 1,"renameProvider":true,"completionProvider": {"resolveProvider": false,"triggerCharacters": [".",":","@"]},"documentHighlightProvider": false,"hoverProvider": true,"codeActionProvider": false,"declarationProvider": true,"definitionProvider": true,"typeDefinitionProvider": true,"implementationProvider": false,"referencesProvider": false,"documentSymbolProvider": true,"colorProvider": false,"documentFormattingProvider": true,"documentRangeFormattingProvider": false,"foldingRangeProvider": false,"selectionRangeProvider": false,"workspaceSymbolProvider": false,"rangeProvider": false,"documentProvider": true,"workspace": {"workspaceFolders": {"supported": true,"changeNotifications": true}},"semanticTokensProvider": {"documentProvider": true,"legend": {"tokenTypes": ["namespace","type","struct","enum","union","parameter","variable","tagField","field","errorTag","function","keyword","comment","string","number","operator","builtin","label"],"tokenModifiers": ["definition","async","documentation", "generic"]}}}}} @@ -1067,7 +1068,7 @@ fn semanticTokensHandler(arena: *std.heap.ArenaAllocator, id: types.RequestId, r }; const semantic_tokens = @import("semantic_tokens.zig"); - const token_array = try semantic_tokens.writeAllSemanticTokens(arena, &document_store, handle); + const token_array = try semantic_tokens.writeAllSemanticTokens(arena, &document_store, handle, offset_encoding); defer allocator.free(token_array); return try send(arena, types.Response{ @@ -1085,7 +1086,7 @@ fn completionHandler(arena: *std.heap.ArenaAllocator, id: types.RequestId, req: }; if (req.params.position.character >= 0) { - const doc_position = try offsets.documentPosition(handle.document, req.params.position, .utf16); + const doc_position = try offsets.documentPosition(handle.document, req.params.position, offset_encoding); const pos_context = try analysis.documentPositionContext(arena, handle.document, doc_position); const this_config = configFromUriOr(req.params.textDocument.uri, config); @@ -1142,7 +1143,7 @@ fn gotoHandler(arena: *std.heap.ArenaAllocator, id: types.RequestId, req: reques }; if (req.params.position.character >= 0) { - const doc_position = try offsets.documentPosition(handle.document, req.params.position, .utf16); + const doc_position = try offsets.documentPosition(handle.document, req.params.position, offset_encoding); const pos_context = try analysis.documentPositionContext(arena, handle.document, doc_position); const this_config = configFromUriOr(req.params.textDocument.uri, config); @@ -1173,7 +1174,7 @@ fn hoverHandler(arena: *std.heap.ArenaAllocator, id: types.RequestId, req: reque }; if (req.params.position.character >= 0) { - const doc_position = try offsets.documentPosition(handle.document, req.params.position, .utf16); + const doc_position = try offsets.documentPosition(handle.document, req.params.position, offset_encoding); const pos_context = try analysis.documentPositionContext(arena, handle.document, doc_position); const this_config = configFromUriOr(req.params.textDocument.uri, config); @@ -1246,7 +1247,7 @@ fn renameHandler(arena: *std.heap.ArenaAllocator, id: types.RequestId, req: requ }; if (req.params.position.character >= 0) { - const doc_position = try offsets.documentPosition(handle.document, req.params.position, .utf16); + const doc_position = try offsets.documentPosition(handle.document, req.params.position, offset_encoding); const pos_context = try analysis.documentPositionContext(arena, handle.document, doc_position); const this_config = configFromUriOr(req.params.textDocument.uri, config); diff --git a/src/offsets.zig b/src/offsets.zig index 8e4759e..6423fc8 100644 --- a/src/offsets.zig +++ b/src/offsets.zig @@ -52,3 +52,64 @@ pub fn documentPosition(doc: types.TextDocument, position: types.Position, encod return DocumentPosition{ .line = line, .absolute_index = line_start_idx + utf8_idx, .line_index = utf8_idx }; } } + +pub const TokenLocation = struct { + line: usize, + column: usize, +}; + +pub fn tokenRelativeLocation(tree: *std.zig.ast.Tree, start_index: usize, token: std.zig.ast.TokenIndex, encoding: Encoding) !TokenLocation { + const token_loc = tree.token_locs[token]; + + var loc = TokenLocation{ + .line = 0, + .column = 0, + }; + const token_start = token_loc.start; + const source = tree.source[start_index..]; + var i: usize = 0; + while (i < token_start - start_index) { + const c = source[i]; + if (c == '\n') { + loc.line += 1; + loc.column = 0; + i += 1; + } else { + if (encoding == .utf16) { + const n = try std.unicode.utf8ByteSequenceLength(c); + const codepoint = try std.unicode.utf8Decode(source[i..i + n]); + if (codepoint < 0x10000) { + loc.column += 1; + } else { + loc.column += 2; + } + i += n; + } else { + loc.column += 1; + i += 1; + } + } + } + return loc; +} + +/// Asserts the token is comprised of valid utf8 +pub fn tokenLength(tree: *std.zig.ast.Tree, token: std.zig.ast.TokenIndex, encoding: Encoding) usize { + const token_loc = tree.token_locs[token]; + if (encoding == .utf8) + return token_loc.end - token_loc.start; + + var i: usize = token_loc.start; + var utf16_len: usize = 0; + while (i < token_loc.end) { + const n = std.unicode.utf8ByteSequenceLength(tree.source[i]) catch unreachable; + const codepoint = std.unicode.utf8Decode(tree.source[i..i + n]) catch unreachable; + if (codepoint < 0x10000) { + utf16_len += 1; + } else { + utf16_len += 2; + } + i += n; + } + return utf16_len; +} diff --git a/src/semantic_tokens.zig b/src/semantic_tokens.zig index 92a493d..b7c3a42 100644 --- a/src/semantic_tokens.zig +++ b/src/semantic_tokens.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const offsets = @import("offsets.zig"); const DocumentStore = @import("document_store.zig"); const analysis = @import("analysis.zig"); const ast = std.zig.ast; @@ -43,12 +44,14 @@ const Builder = struct { handle: *DocumentStore.Handle, current_token: ?ast.TokenIndex, arr: std.ArrayList(u32), + encoding: offsets.Encoding, - fn init(allocator: *std.mem.Allocator, handle: *DocumentStore.Handle) Builder { + fn init(allocator: *std.mem.Allocator, handle: *DocumentStore.Handle, encoding: offsets.Encoding) Builder { return Builder{ .handle = handle, .current_token = null, .arr = std.ArrayList(u32).init(allocator), + .encoding = encoding, }; } @@ -57,13 +60,11 @@ const Builder = struct { self.handle.tree.token_locs[current_token].start else 0; - - const token_loc = self.handle.tree.token_locs[token]; - const delta_loc = self.handle.tree.tokenLocationLoc(start_idx, token_loc); + const delta_loc = offsets.tokenRelativeLocation(self.handle.tree, start_idx, token, self.encoding) catch return; try self.arr.appendSlice(&[_]u32{ @truncate(u32, delta_loc.line), @truncate(u32, delta_loc.column), - @truncate(u32, token_loc.end - token_loc.start), + @truncate(u32, offsets.tokenLength(self.handle.tree, token, self.encoding)), @enumToInt(token_type), token_modifiers.toInt(), }); @@ -722,8 +723,8 @@ fn writeNodeTokens(builder: *Builder, arena: *std.heap.ArenaAllocator, store: *D } // TODO Range version, edit version. -pub fn writeAllSemanticTokens(arena: *std.heap.ArenaAllocator, store: *DocumentStore, handle: *DocumentStore.Handle) ![]u32 { - var builder = Builder.init(arena.child_allocator, handle); +pub fn writeAllSemanticTokens(arena: *std.heap.ArenaAllocator, store: *DocumentStore, handle: *DocumentStore.Handle, encoding: offsets.Encoding) ![]u32 { + var builder = Builder.init(arena.child_allocator, handle, encoding); try writeNodeTokens(&builder, arena, store, &handle.tree.root_node.base); return builder.toOwnedSlice(); }