const std = @import("std"); const types = @import("types.zig"); const ast = std.zig.ast; pub const Encoding = enum { utf8, utf16, }; pub const DocumentPosition = struct { line: []const u8, line_index: usize, absolute_index: usize, }; pub fn documentPosition(doc: types.TextDocument, position: types.Position, encoding: Encoding) !DocumentPosition { var split_iterator = std.mem.split(doc.text, "\n"); var line_idx: i64 = 0; var line: []const u8 = ""; while (line_idx < position.line) : (line_idx += 1) { line = split_iterator.next() orelse return error.InvalidParams; } const line_start_idx = split_iterator.index.?; line = split_iterator.next() orelse return error.InvalidParams; if (encoding == .utf8) { const index = @intCast(i64, line_start_idx) + position.character; if (index < 0 or index > @intCast(i64, doc.text.len)) { return error.InvalidParams; } return DocumentPosition{ .line = line, .absolute_index = @intCast(usize, index), .line_index = @intCast(usize, position.character) }; } else { const utf8 = doc.text[line_start_idx..]; var utf8_idx: usize = 0; var utf16_idx: usize = 0; while (utf16_idx < position.character) { if (utf8_idx > utf8.len) { return error.InvalidParams; } const n = try std.unicode.utf8ByteSequenceLength(utf8[utf8_idx]); const next_utf8_idx = utf8_idx + n; const codepoint = try std.unicode.utf8Decode(utf8[utf8_idx..next_utf8_idx]); if (codepoint < 0x10000) { utf16_idx += 1; } else { utf16_idx += 2; } utf8_idx = next_utf8_idx; } return DocumentPosition{ .line = line, .absolute_index = line_start_idx + utf8_idx, .line_index = utf8_idx }; } } pub const TokenLocation = struct { line: usize, column: usize, offset: usize, pub fn add(lhs: TokenLocation, rhs: TokenLocation) TokenLocation { return .{ .line = lhs.line + rhs.line, .column = if (rhs.line == 0) lhs.column + rhs.column else rhs.column, .offset = rhs.offset, }; } }; pub fn tokenRelativeLocation(tree: ast.Tree, start_index: usize, next_token_index: usize, encoding: Encoding) !TokenLocation { const start = next_token_index; var loc = TokenLocation{ .line = 0, .column = 0, .offset = 0, }; const token_start = start; const source = tree.source[start_index..]; var i: usize = 0; while (i + start_index < token_start) { const c = source[i]; if (c == '\n') { loc.line += 1; loc.column = 0; i += 1; } else { if (encoding == .utf16) { const n = try std.unicode.utf8ByteSequenceLength(c); const codepoint = try std.unicode.utf8Decode(source[i .. i + n]); if (codepoint < 0x10000) { loc.column += 1; } else { loc.column += 2; } i += n; } else { loc.column += 1; i += 1; } } } loc.offset = i + start_index; return loc; } /// Asserts the token is comprised of valid utf8 pub fn tokenLength(tree: ast.Tree, token: ast.TokenIndex, encoding: Encoding) usize { const token_loc = tokenLocation(tree, token); if (encoding == .utf8) return token_loc.end - token_loc.start; var i: usize = token_loc.start; var utf16_len: usize = 0; while (i < token_loc.end) { const n = std.unicode.utf8ByteSequenceLength(tree.source[i]) catch unreachable; const codepoint = std.unicode.utf8Decode(tree.source[i .. i + n]) catch unreachable; if (codepoint < 0x10000) { utf16_len += 1; } else { utf16_len += 2; } i += n; } return utf16_len; } /// Token location inside source pub const Loc = struct { start: usize, end: usize, }; pub fn tokenLocation(tree: ast.Tree, token_index: ast.TokenIndex) Loc { const start = tree.tokens.items(.start)[token_index]; const tag = tree.tokens.items(.tag)[token_index]; // For some tokens, re-tokenization is needed to find the end. var tokenizer: std.zig.Tokenizer = .{ .buffer = tree.source, .index = start, .pending_invalid_token = null, }; const token = tokenizer.next(); std.debug.assert(token.tag == tag); return .{ .start = token.loc.start, .end = token.loc.end }; } pub fn documentRange(doc: types.TextDocument, encoding: Encoding) !types.Range { var line_idx: i64 = 0; var curr_line: []const u8 = doc.text; var split_iterator = std.mem.split(doc.text, "\n"); while (split_iterator.next()) |line| : (line_idx += 1) { curr_line = line; } if (encoding == .utf8) { return types.Range{ .start = .{ .line = 0, .character = 0, }, .end = .{ .line = line_idx, .character = @intCast(i64, curr_line.len), }, }; } else { var utf16_len: usize = 0; var line_utf8_idx: usize = 0; while (line_utf8_idx < curr_line.len) { const n = try std.unicode.utf8ByteSequenceLength(curr_line[line_utf8_idx]); const codepoint = try std.unicode.utf8Decode(curr_line[line_utf8_idx .. line_utf8_idx + n]); if (codepoint < 0x10000) { utf16_len += 1; } else { utf16_len += 2; } line_utf8_idx += n; } return types.Range{ .start = .{ .line = 0, .character = 0, }, .end = .{ .line = line_idx, .character = @intCast(i64, utf16_len), }, }; } } // Updated version from std that allows for failures // by removing the unreachables and returning up to that point // so that we can always provide information while the user is still typing pub fn lastToken(tree: ast.Tree, node: ast.Node.Index) ast.TokenIndex { const Node = ast.Node; const TokenIndex = ast.TokenIndex; const tags = tree.nodes.items(.tag); const datas = tree.nodes.items(.data); const main_tokens = tree.nodes.items(.main_token); const token_starts = tree.tokens.items(.start); const token_tags = tree.tokens.items(.tag); var n = node; var end_offset: TokenIndex = 0; while (true) switch (tags[n]) { .root => return @intCast(TokenIndex, tree.tokens.len - 1), .@"usingnamespace", .bool_not, .negation, .bit_not, .negation_wrap, .address_of, .@"try", .@"await", .optional_type, .@"resume", .@"nosuspend", .@"comptime", => n = datas[n].lhs, .test_decl, .@"errdefer", .@"defer", .@"catch", .equal_equal, .bang_equal, .less_than, .greater_than, .less_or_equal, .greater_or_equal, .assign_mul, .assign_div, .assign_mod, .assign_add, .assign_sub, .assign_bit_shift_left, .assign_bit_shift_right, .assign_bit_and, .assign_bit_xor, .assign_bit_or, .assign_mul_wrap, .assign_add_wrap, .assign_sub_wrap, .assign, .merge_error_sets, .mul, .div, .mod, .array_mult, .mul_wrap, .add, .sub, .array_cat, .add_wrap, .sub_wrap, .bit_shift_left, .bit_shift_right, .bit_and, .bit_xor, .bit_or, .@"orelse", .bool_and, .bool_or, .anyframe_type, .error_union, .if_simple, .while_simple, .for_simple, .fn_proto_simple, .fn_proto_multi, .ptr_type_aligned, .ptr_type_sentinel, .ptr_type, .ptr_type_bit_range, .array_type, .switch_case_one, .switch_case, .switch_range, => n = datas[n].rhs, .field_access, .unwrap_optional, .grouped_expression, .multiline_string_literal, .error_set_decl, .asm_simple, .asm_output, .asm_input, .error_value, => return datas[n].rhs + end_offset, .@"anytype", .anyframe_literal, .char_literal, .integer_literal, .float_literal, .false_literal, .true_literal, .null_literal, .undefined_literal, .unreachable_literal, .identifier, .deref, .enum_literal, .string_literal, => return main_tokens[n] + end_offset, .@"return" => if (datas[n].lhs != 0) { n = datas[n].lhs; } else { return main_tokens[n] + end_offset; }, .call, .async_call => { end_offset += 1; // for the rparen const params = tree.extraData(datas[n].rhs, Node.SubRange); if (params.end - params.start == 0) { return main_tokens[n] + end_offset; } n = tree.extra_data[params.end - 1]; // last parameter }, .tagged_union_enum_tag => { const members = tree.extraData(datas[n].rhs, Node.SubRange); if (members.end - members.start == 0) { end_offset += 4; // for the rparen + rparen + lbrace + rbrace n = datas[n].lhs; } else { end_offset += 1; // for the rbrace n = tree.extra_data[members.end - 1]; // last parameter } }, .call_comma, .async_call_comma, .tagged_union_enum_tag_trailing, => { end_offset += 2; // for the comma/semicolon + rparen/rbrace const params = tree.extraData(datas[n].rhs, Node.SubRange); std.debug.assert(params.end > params.start); n = tree.extra_data[params.end - 1]; // last parameter }, .@"switch" => { const cases = tree.extraData(datas[n].rhs, Node.SubRange); if (cases.end - cases.start == 0) { end_offset += 3; // rparen, lbrace, rbrace n = datas[n].lhs; // condition expression } else { end_offset += 1; // for the rbrace n = tree.extra_data[cases.end - 1]; // last case } }, .container_decl_arg => { const members = tree.extraData(datas[n].rhs, Node.SubRange); if (members.end - members.start == 0) { end_offset += 3; // for the rparen + lbrace + rbrace n = datas[n].lhs; } else { end_offset += 1; // for the rbrace n = tree.extra_data[members.end - 1]; // last parameter } }, .@"asm" => { const extra = tree.extraData(datas[n].rhs, Node.Asm); return extra.rparen + end_offset; }, .array_init, .struct_init, => { const elements = tree.extraData(datas[n].rhs, Node.SubRange); std.debug.assert(elements.end - elements.start > 0); end_offset += 1; // for the rbrace n = tree.extra_data[elements.end - 1]; // last element }, .array_init_comma, .struct_init_comma, .container_decl_arg_trailing, .switch_comma, => { const members = tree.extraData(datas[n].rhs, Node.SubRange); std.debug.assert(members.end - members.start > 0); end_offset += 2; // for the comma + rbrace n = tree.extra_data[members.end - 1]; // last parameter }, .array_init_dot, .struct_init_dot, .block, .container_decl, .tagged_union, .builtin_call, => { std.debug.assert(datas[n].rhs - datas[n].lhs > 0); end_offset += 1; // for the rbrace n = tree.extra_data[datas[n].rhs - 1]; // last statement }, .array_init_dot_comma, .struct_init_dot_comma, .block_semicolon, .container_decl_trailing, .tagged_union_trailing, .builtin_call_comma, => { std.debug.assert(datas[n].rhs - datas[n].lhs > 0); end_offset += 2; // for the comma/semicolon + rbrace/rparen n = tree.extra_data[datas[n].rhs - 1]; // last member }, .call_one, .async_call_one, .array_access, => { end_offset += 1; // for the rparen/rbracket if (datas[n].rhs == 0) { return main_tokens[n] + end_offset; } n = datas[n].rhs; }, .array_init_dot_two, .block_two, .builtin_call_two, .struct_init_dot_two, .container_decl_two, .tagged_union_two, => { if (datas[n].rhs != 0) { end_offset += 1; // for the rparen/rbrace n = datas[n].rhs; } else if (datas[n].lhs != 0) { end_offset += 1; // for the rparen/rbrace n = datas[n].lhs; } else { switch (tags[n]) { .array_init_dot_two, .block_two, .struct_init_dot_two, => end_offset += 1, // rbrace .builtin_call_two => end_offset += 2, // lparen/lbrace + rparen/rbrace .container_decl_two => { var i: u32 = 2; // lbrace + rbrace while (token_tags[main_tokens[n] + i] == .container_doc_comment) i += 1; end_offset += i; }, .tagged_union_two => { var i: u32 = 5; // (enum) {} while (token_tags[main_tokens[n] + i] == .container_doc_comment) i += 1; end_offset += i; }, else => unreachable, } return main_tokens[n] + end_offset; } }, .array_init_dot_two_comma, .builtin_call_two_comma, .block_two_semicolon, .struct_init_dot_two_comma, .container_decl_two_trailing, .tagged_union_two_trailing, => { end_offset += 2; // for the comma/semicolon + rbrace/rparen if (datas[n].rhs != 0) { n = datas[n].rhs; } else if (datas[n].lhs != 0) { n = datas[n].lhs; } else { return main_tokens[n] + end_offset; // returns { } } }, .simple_var_decl => { if (datas[n].rhs != 0) { n = datas[n].rhs; } else if (datas[n].lhs != 0) { n = datas[n].lhs; } else { end_offset += 1; // from mut token to name return main_tokens[n] + end_offset; } }, .aligned_var_decl => { if (datas[n].rhs != 0) { n = datas[n].rhs; } else if (datas[n].lhs != 0) { end_offset += 1; // for the rparen n = datas[n].lhs; } else { end_offset += 1; // from mut token to name return main_tokens[n] + end_offset; } }, .global_var_decl => { if (datas[n].rhs != 0) { n = datas[n].rhs; } else { const extra = tree.extraData(datas[n].lhs, Node.GlobalVarDecl); if (extra.section_node != 0) { end_offset += 1; // for the rparen n = extra.section_node; } else if (extra.align_node != 0) { end_offset += 1; // for the rparen n = extra.align_node; } else if (extra.type_node != 0) { n = extra.type_node; } else { end_offset += 1; // from mut token to name return main_tokens[n] + end_offset; } } }, .local_var_decl => { if (datas[n].rhs != 0) { n = datas[n].rhs; } else { const extra = tree.extraData(datas[n].lhs, Node.LocalVarDecl); if (extra.align_node != 0) { end_offset += 1; // for the rparen n = extra.align_node; } else if (extra.type_node != 0) { n = extra.type_node; } else { end_offset += 1; // from mut token to name return main_tokens[n] + end_offset; } } }, .container_field_init => { if (datas[n].rhs != 0) { n = datas[n].rhs; } else if (datas[n].lhs != 0) { n = datas[n].lhs; } else { return main_tokens[n] + end_offset; } }, .container_field_align => { if (datas[n].rhs != 0) { end_offset += 1; // for the rparen n = datas[n].rhs; } else if (datas[n].lhs != 0) { n = datas[n].lhs; } else { return main_tokens[n] + end_offset; } }, .container_field => { const extra = tree.extraData(datas[n].rhs, Node.ContainerField); if (extra.value_expr != 0) { n = extra.value_expr; } else if (extra.align_expr != 0) { end_offset += 1; // for the rparen n = extra.align_expr; } else if (datas[n].lhs != 0) { n = datas[n].lhs; } else { return main_tokens[n] + end_offset; } }, .array_init_one, .struct_init_one, => { end_offset += 1; // rbrace if (datas[n].rhs == 0) { return main_tokens[n] + end_offset; } else { n = datas[n].rhs; } }, .slice_open, .call_one_comma, .async_call_one_comma, .array_init_one_comma, .struct_init_one_comma, => { end_offset += 2; // ellipsis2 + rbracket, or comma + rparen n = datas[n].rhs; std.debug.assert(n != 0); }, .slice => { const extra = tree.extraData(datas[n].rhs, Node.Slice); std.debug.assert(extra.end != 0); // should have used slice_open end_offset += 1; // rbracket n = extra.end; }, .slice_sentinel => { const extra = tree.extraData(datas[n].rhs, Node.SliceSentinel); std.debug.assert(extra.sentinel != 0); // should have used slice end_offset += 1; // rbracket n = extra.sentinel; }, .@"continue" => { if (datas[n].lhs != 0) { return datas[n].lhs + end_offset; } else { return main_tokens[n] + end_offset; } }, .@"break" => { if (datas[n].rhs != 0) { n = datas[n].rhs; } else if (datas[n].lhs != 0) { return datas[n].lhs + end_offset; } else { return main_tokens[n] + end_offset; } }, .fn_decl => { if (datas[n].rhs != 0) { n = datas[n].rhs; } else { n = datas[n].lhs; } }, .fn_proto_one => { const extra = tree.extraData(datas[n].lhs, Node.FnProtoOne); // linksection, callconv, align can appear in any order, so we // find the last one here. var max_node: Node.Index = datas[n].rhs; var max_start = token_starts[main_tokens[max_node]]; var max_offset: TokenIndex = 0; if (extra.align_expr != 0) { const start = token_starts[main_tokens[extra.align_expr]]; if (start > max_start) { max_node = extra.align_expr; max_start = start; max_offset = 1; // for the rparen } } if (extra.section_expr != 0) { const start = token_starts[main_tokens[extra.section_expr]]; if (start > max_start) { max_node = extra.section_expr; max_start = start; max_offset = 1; // for the rparen } } if (extra.callconv_expr != 0) { const start = token_starts[main_tokens[extra.callconv_expr]]; if (start > max_start) { max_node = extra.callconv_expr; max_start = start; max_offset = 1; // for the rparen } } n = max_node; end_offset += max_offset; }, .fn_proto => { const extra = tree.extraData(datas[n].lhs, Node.FnProto); // linksection, callconv, align can appear in any order, so we // find the last one here. var max_node: Node.Index = datas[n].rhs; var max_start = token_starts[main_tokens[max_node]]; var max_offset: TokenIndex = 0; if (extra.align_expr != 0) { const start = token_starts[main_tokens[extra.align_expr]]; if (start > max_start) { max_node = extra.align_expr; max_start = start; max_offset = 1; // for the rparen } } if (extra.section_expr != 0) { const start = token_starts[main_tokens[extra.section_expr]]; if (start > max_start) { max_node = extra.section_expr; max_start = start; max_offset = 1; // for the rparen } } if (extra.callconv_expr != 0) { const start = token_starts[main_tokens[extra.callconv_expr]]; if (start > max_start) { max_node = extra.callconv_expr; max_start = start; max_offset = 1; // for the rparen } } n = max_node; end_offset += max_offset; }, .while_cont => { const extra = tree.extraData(datas[n].rhs, Node.WhileCont); std.debug.assert(extra.then_expr != 0); n = extra.then_expr; }, .@"while" => { const extra = tree.extraData(datas[n].rhs, Node.While); std.debug.assert(extra.else_expr != 0); n = extra.else_expr; }, .@"if", .@"for" => { const extra = tree.extraData(datas[n].rhs, Node.If); std.debug.assert(extra.else_expr != 0); n = extra.else_expr; }, .@"suspend" => { if (datas[n].lhs != 0) { n = datas[n].lhs; } else { return main_tokens[n] + end_offset; } }, .array_type_sentinel => { const extra = tree.extraData(datas[n].rhs, Node.ArrayTypeSentinel); n = extra.elem_type; }, }; }