2020-07-03 00:33:10 +01:00
|
|
|
const std = @import("std");
|
2022-07-15 17:06:18 +01:00
|
|
|
const types = @import("types.zig");
|
2021-10-01 02:44:06 +01:00
|
|
|
const Ast = std.zig.Ast;
|
2020-07-03 00:33:10 +01:00
|
|
|
|
|
|
|
pub const Encoding = enum {
|
|
|
|
utf8,
|
|
|
|
utf16,
|
|
|
|
};
|
|
|
|
|
|
|
|
pub const DocumentPosition = struct {
|
|
|
|
line: []const u8,
|
|
|
|
line_index: usize,
|
|
|
|
absolute_index: usize,
|
|
|
|
};
|
|
|
|
|
|
|
|
pub fn documentPosition(doc: types.TextDocument, position: types.Position, encoding: Encoding) !DocumentPosition {
|
2021-08-09 17:14:20 +01:00
|
|
|
var split_iterator = std.mem.split(u8, doc.text, "\n");
|
2020-07-03 00:33:10 +01:00
|
|
|
|
|
|
|
var line_idx: i64 = 0;
|
|
|
|
var line: []const u8 = "";
|
|
|
|
while (line_idx < position.line) : (line_idx += 1) {
|
|
|
|
line = split_iterator.next() orelse return error.InvalidParams;
|
|
|
|
}
|
|
|
|
|
|
|
|
const line_start_idx = split_iterator.index.?;
|
|
|
|
line = split_iterator.next() orelse return error.InvalidParams;
|
|
|
|
|
|
|
|
if (encoding == .utf8) {
|
|
|
|
const index = @intCast(i64, line_start_idx) + position.character;
|
|
|
|
if (index < 0 or index > @intCast(i64, doc.text.len)) {
|
|
|
|
return error.InvalidParams;
|
|
|
|
}
|
2021-03-31 00:25:49 +01:00
|
|
|
return DocumentPosition{
|
|
|
|
.line = line,
|
|
|
|
.absolute_index = @intCast(usize, index),
|
|
|
|
.line_index = @intCast(usize, position.character),
|
|
|
|
};
|
2020-07-03 00:33:10 +01:00
|
|
|
} else {
|
|
|
|
const utf8 = doc.text[line_start_idx..];
|
|
|
|
var utf8_idx: usize = 0;
|
|
|
|
var utf16_idx: usize = 0;
|
|
|
|
while (utf16_idx < position.character) {
|
|
|
|
if (utf8_idx > utf8.len) {
|
|
|
|
return error.InvalidParams;
|
|
|
|
}
|
|
|
|
|
|
|
|
const n = try std.unicode.utf8ByteSequenceLength(utf8[utf8_idx]);
|
|
|
|
const next_utf8_idx = utf8_idx + n;
|
|
|
|
const codepoint = try std.unicode.utf8Decode(utf8[utf8_idx..next_utf8_idx]);
|
|
|
|
if (codepoint < 0x10000) {
|
|
|
|
utf16_idx += 1;
|
|
|
|
} else {
|
|
|
|
utf16_idx += 2;
|
|
|
|
}
|
|
|
|
utf8_idx = next_utf8_idx;
|
|
|
|
}
|
2021-03-31 00:25:49 +01:00
|
|
|
return DocumentPosition{
|
|
|
|
.line = line,
|
|
|
|
.absolute_index = line_start_idx + utf8_idx,
|
|
|
|
.line_index = utf8_idx,
|
|
|
|
};
|
2020-07-03 00:33:10 +01:00
|
|
|
}
|
|
|
|
}
|
2020-07-03 09:34:42 +01:00
|
|
|
|
2021-10-01 02:57:45 +01:00
|
|
|
pub fn lineSectionLength(tree: Ast, start_index: usize, end_index: usize, encoding: Encoding) !usize {
|
2021-03-26 19:04:51 +00:00
|
|
|
const source = tree.source[start_index..];
|
|
|
|
std.debug.assert(end_index >= start_index and source.len >= end_index - start_index);
|
|
|
|
if (encoding == .utf8) {
|
|
|
|
return end_index - start_index;
|
|
|
|
}
|
|
|
|
|
|
|
|
var result: usize = 0;
|
|
|
|
var i: usize = 0;
|
|
|
|
while (i + start_index < end_index) {
|
|
|
|
std.debug.assert(source[i] != '\n');
|
|
|
|
|
|
|
|
const n = try std.unicode.utf8ByteSequenceLength(source[i]);
|
|
|
|
if (i + n >= source.len)
|
|
|
|
return error.CodepointTooLong;
|
|
|
|
|
|
|
|
const codepoint = try std.unicode.utf8Decode(source[i .. i + n]);
|
|
|
|
|
|
|
|
result += 1 + @as(usize, @boolToInt(codepoint >= 0x10000));
|
|
|
|
i += n;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-07-03 09:34:42 +01:00
|
|
|
pub const TokenLocation = struct {
|
|
|
|
line: usize,
|
|
|
|
column: usize,
|
2020-07-07 09:57:02 +01:00
|
|
|
offset: usize,
|
|
|
|
|
|
|
|
pub fn add(lhs: TokenLocation, rhs: TokenLocation) TokenLocation {
|
|
|
|
return .{
|
|
|
|
.line = lhs.line + rhs.line,
|
|
|
|
.column = if (rhs.line == 0)
|
|
|
|
lhs.column + rhs.column
|
|
|
|
else
|
|
|
|
rhs.column,
|
|
|
|
.offset = rhs.offset,
|
|
|
|
};
|
|
|
|
}
|
2020-07-03 09:34:42 +01:00
|
|
|
};
|
|
|
|
|
2021-10-01 02:57:45 +01:00
|
|
|
pub fn tokenRelativeLocation(tree: Ast, start_index: usize, token_start: usize, encoding: Encoding) !TokenLocation {
|
2022-07-17 15:29:03 +01:00
|
|
|
if (token_start < start_index)
|
|
|
|
return error.InvalidParams;
|
|
|
|
|
2020-07-03 09:34:42 +01:00
|
|
|
var loc = TokenLocation{
|
|
|
|
.line = 0,
|
|
|
|
.column = 0,
|
2020-07-07 09:57:02 +01:00
|
|
|
.offset = 0,
|
2020-07-03 09:34:42 +01:00
|
|
|
};
|
2021-03-26 19:04:51 +00:00
|
|
|
|
2020-07-03 09:34:42 +01:00
|
|
|
const source = tree.source[start_index..];
|
|
|
|
var i: usize = 0;
|
2020-09-25 04:59:45 +01:00
|
|
|
while (i + start_index < token_start) {
|
2020-07-03 09:34:42 +01:00
|
|
|
const c = source[i];
|
|
|
|
if (c == '\n') {
|
|
|
|
loc.line += 1;
|
|
|
|
loc.column = 0;
|
|
|
|
i += 1;
|
|
|
|
} else {
|
|
|
|
if (encoding == .utf16) {
|
|
|
|
const n = try std.unicode.utf8ByteSequenceLength(c);
|
2021-03-26 19:04:51 +00:00
|
|
|
if (i + n >= source.len)
|
|
|
|
return error.CodepointTooLong;
|
|
|
|
|
2020-07-03 10:24:24 +01:00
|
|
|
const codepoint = try std.unicode.utf8Decode(source[i .. i + n]);
|
2021-03-26 19:04:51 +00:00
|
|
|
loc.column += 1 + @as(usize, @boolToInt(codepoint >= 0x10000));
|
2020-07-03 09:34:42 +01:00
|
|
|
i += n;
|
|
|
|
} else {
|
|
|
|
loc.column += 1;
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-07-07 09:57:02 +01:00
|
|
|
loc.offset = i + start_index;
|
2020-07-03 09:34:42 +01:00
|
|
|
return loc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Asserts the token is comprised of valid utf8
|
2021-10-01 02:57:45 +01:00
|
|
|
pub fn tokenLength(tree: Ast, token: Ast.TokenIndex, encoding: Encoding) usize {
|
2022-09-05 21:49:50 +01:00
|
|
|
return locationLength(tokenLocation(tree, token), tree, encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Token location inside source
|
|
|
|
pub const Loc = struct {
|
|
|
|
start: usize,
|
|
|
|
end: usize,
|
|
|
|
};
|
|
|
|
|
|
|
|
pub fn locationLength(loc: Loc, tree: Ast, encoding: Encoding) usize {
|
2020-07-03 09:34:42 +01:00
|
|
|
if (encoding == .utf8)
|
2022-09-05 21:49:50 +01:00
|
|
|
return loc.end - loc.start;
|
2020-07-03 09:34:42 +01:00
|
|
|
|
2022-09-05 21:49:50 +01:00
|
|
|
var i: usize = loc.start;
|
2020-07-03 09:34:42 +01:00
|
|
|
var utf16_len: usize = 0;
|
2022-09-05 21:49:50 +01:00
|
|
|
while (i < loc.end) {
|
2020-07-03 09:34:42 +01:00
|
|
|
const n = std.unicode.utf8ByteSequenceLength(tree.source[i]) catch unreachable;
|
2020-07-03 10:24:24 +01:00
|
|
|
const codepoint = std.unicode.utf8Decode(tree.source[i .. i + n]) catch unreachable;
|
2020-07-03 09:34:42 +01:00
|
|
|
if (codepoint < 0x10000) {
|
|
|
|
utf16_len += 1;
|
|
|
|
} else {
|
|
|
|
utf16_len += 2;
|
|
|
|
}
|
|
|
|
i += n;
|
|
|
|
}
|
|
|
|
return utf16_len;
|
|
|
|
}
|
2020-07-03 10:39:58 +01:00
|
|
|
|
2021-10-01 02:57:45 +01:00
|
|
|
pub fn tokenLocation(tree: Ast, token_index: Ast.TokenIndex) Loc {
|
2021-03-05 21:38:42 +00:00
|
|
|
const start = tree.tokens.items(.start)[token_index];
|
|
|
|
const tag = tree.tokens.items(.tag)[token_index];
|
|
|
|
|
|
|
|
// For some tokens, re-tokenization is needed to find the end.
|
|
|
|
var tokenizer: std.zig.Tokenizer = .{
|
|
|
|
.buffer = tree.source,
|
|
|
|
.index = start,
|
|
|
|
.pending_invalid_token = null,
|
|
|
|
};
|
|
|
|
|
|
|
|
const token = tokenizer.next();
|
2022-09-07 18:14:11 +01:00
|
|
|
// HACK, should return error.UnextectedToken
|
|
|
|
if (token.tag != tag) return .{ .start = 0, .end = 0 }; //std.debug.assert(token.tag == tag);
|
2021-03-05 21:38:42 +00:00
|
|
|
return .{ .start = token.loc.start, .end = token.loc.end };
|
|
|
|
}
|
|
|
|
|
2022-09-05 21:49:50 +01:00
|
|
|
/// returns the range of the given token at `token_index`
|
|
|
|
pub fn tokenToRange(tree: Ast, token_index: Ast.TokenIndex, encoding: Encoding) !types.Range {
|
|
|
|
const loc = try tokenRelativeLocation(tree, 0, tree.tokens.items(.start)[token_index], encoding);
|
|
|
|
const length = tokenLength(tree, token_index, encoding);
|
|
|
|
|
|
|
|
return types.Range{
|
|
|
|
.start = .{
|
|
|
|
.line = @intCast(i64, loc.line),
|
|
|
|
.character = @intCast(i64, loc.column),
|
|
|
|
},
|
|
|
|
.end = .{
|
|
|
|
.line = @intCast(i64, loc.line),
|
|
|
|
.character = @intCast(i64, loc.column + length),
|
|
|
|
},
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/// returns the range of a token pointed to by `position`
|
|
|
|
pub fn tokenPositionToRange(tree: Ast, position: types.Position, encoding: Encoding) !types.Range {
|
|
|
|
const doc = .{
|
|
|
|
.uri = undefined,
|
|
|
|
.text = tree.source,
|
|
|
|
.mem = undefined,
|
|
|
|
};
|
|
|
|
const document_position = try documentPosition(doc, position, encoding);
|
|
|
|
|
|
|
|
var tokenizer: std.zig.Tokenizer = .{
|
|
|
|
.buffer = tree.source,
|
|
|
|
.index = document_position.absolute_index,
|
|
|
|
.pending_invalid_token = null,
|
|
|
|
};
|
|
|
|
const token = tokenizer.next();
|
|
|
|
const loc: Loc = .{ .start = token.loc.start, .end = token.loc.end };
|
|
|
|
const length = locationLength(loc, tree, encoding);
|
|
|
|
|
|
|
|
return types.Range{
|
|
|
|
.start = position,
|
|
|
|
.end = .{
|
|
|
|
.line = position.line,
|
|
|
|
.character = position.character + @intCast(i64, length),
|
|
|
|
},
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-07-03 10:39:58 +01:00
|
|
|
pub fn documentRange(doc: types.TextDocument, encoding: Encoding) !types.Range {
|
|
|
|
var line_idx: i64 = 0;
|
|
|
|
var curr_line: []const u8 = doc.text;
|
|
|
|
|
2021-08-09 17:14:20 +01:00
|
|
|
var split_iterator = std.mem.split(u8, doc.text, "\n");
|
2020-07-03 10:39:58 +01:00
|
|
|
while (split_iterator.next()) |line| : (line_idx += 1) {
|
|
|
|
curr_line = line;
|
|
|
|
}
|
|
|
|
|
2022-06-09 22:48:18 +01:00
|
|
|
if (line_idx > 0) line_idx -= 1;
|
|
|
|
|
2020-07-03 10:39:58 +01:00
|
|
|
if (encoding == .utf8) {
|
|
|
|
return types.Range{
|
|
|
|
.start = .{
|
|
|
|
.line = 0,
|
|
|
|
.character = 0,
|
|
|
|
},
|
|
|
|
.end = .{
|
|
|
|
.line = line_idx,
|
|
|
|
.character = @intCast(i64, curr_line.len),
|
|
|
|
},
|
|
|
|
};
|
|
|
|
} else {
|
|
|
|
var utf16_len: usize = 0;
|
|
|
|
var line_utf8_idx: usize = 0;
|
|
|
|
while (line_utf8_idx < curr_line.len) {
|
|
|
|
const n = try std.unicode.utf8ByteSequenceLength(curr_line[line_utf8_idx]);
|
|
|
|
const codepoint = try std.unicode.utf8Decode(curr_line[line_utf8_idx .. line_utf8_idx + n]);
|
|
|
|
if (codepoint < 0x10000) {
|
|
|
|
utf16_len += 1;
|
|
|
|
} else {
|
|
|
|
utf16_len += 2;
|
|
|
|
}
|
|
|
|
line_utf8_idx += n;
|
|
|
|
}
|
|
|
|
return types.Range{
|
|
|
|
.start = .{
|
|
|
|
.line = 0,
|
|
|
|
.character = 0,
|
|
|
|
},
|
|
|
|
.end = .{
|
|
|
|
.line = line_idx,
|
|
|
|
.character = @intCast(i64, utf16_len),
|
|
|
|
},
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|