latex-lsp/parser.ts

482 lines
11 KiB
TypeScript
Raw Normal View History

2023-12-25 22:44:16 +00:00
import fs from 'fs';
import process from 'process';
import { Severity, type Dialog } from './lsp';
type ParseResultConversion = {
length: number,
original_length?: number,
original_position: number,
position: number,
type: 'text' | 'h1' | 'h2'
};
type ParseResult = {
text: string,
originalString: string,
conversions: ParseResultConversion[],
}
// Returns the number of character skiped
// This puts the i at the \n so it's skiped
function parseComment(text: string, curPos: number): number {
for (let i = curPos + 1; i < text.length; i++) {
const char = text[i];
if (char === '\n') {
return i - curPos;
}
}
return text.length - curPos;
}
function createPartition(text: string, startPos: number, curPos: number, result: string): [ParseResultConversion[], string] | null {
if (startPos >= curPos || text.substring(startPos, curPos).match(/^\s*$/)) {
return null;
}
var t = text.substring(startPos, curPos);
if (t.indexOf('\n') == -1) {
return [[{
length: curPos - startPos,
position: result.length,
original_position: startPos,
type: 'text',
}], t];
}
const split = t.split('\n');
let nt = "";
const convs: ParseResultConversion[] = [];
let n = startPos;
let pos = result.length;
for (const line of split) {
let nLine = line.replace(/^\s*/, '');
n += line.length - nLine.length;
nt += nLine + '\n';
convs.push({
length: nLine.length,
original_position: n,
position: pos,
type: 'text'
});
pos += nLine.length + 1;
n += nLine.length + 1;
}
return [convs, nt];
}
function isChar(charCode: number): boolean {
return (charCode >= 92 && charCode <= 122) || (charCode >= 65 && charCode <= 90);
}
function readBalanced(endChar: string, startChar: string, text: string, curPos: number): number {
let bal = 1;
for (let i = curPos; i < text.length; i++) {
const char = text[i];
if (char == endChar) {
if (bal == 1) {
return i - curPos + 1;
} else {
bal -= 1;
}
} else if (char == startChar) {
bal += 1;
}
}
throw new Error("Can not find end of balance read")
}
function isWhiteSpace(char: string): boolean {
return [' ', '\t', '\n'].includes(char);
}
function parseCommand(text: string, curPos: number, result: string): [number, ParseResultConversion, string] | [number] {
if (text.length - 1 == curPos) {
throw new Error("The latex file has the wrong format the file can not end with a empty command");
}
if (text[curPos + 1] === '\\') {
return [1, {
length: 1,
position: result.length,
original_position: curPos + 1,
type: 'text'
}, '\\'];
} else if (text[curPos + 1] === '%') {
return [1, {
length: 1,
position: result.length,
original_position: curPos + 1,
type: 'text'
}, '%'];
}
let commandName = "";
let commandNameFinished = false;
// TODO store the location of the opts and args
let options = [];
let args = [];
let findEnd = false;
let len = 0;
for (let i = curPos + 1; i < text.length; i++) {
const char = text[i];
if (isChar(char.charCodeAt(0))) {
if (!commandNameFinished) {
commandName += char;
} else {
len = i;
findEnd = true;
break;
}
} else if (char === '[') {
commandNameFinished = true;
const len = readBalanced(']', '[', text, i + 1);
options.push(text.substring(i + 1, i + len))
i += len;
} else if (char === '{') {
commandNameFinished = true;
const len = readBalanced('}', '{', text, i + 1);
args.push(text.substring(i + 1, i + len))
i += len;
} else if (isWhiteSpace(char)) {
len = i;
findEnd = true;
break;
} else {
if (char == '.' || char == ',') {
if (commandNameFinished) {
len = i;
findEnd = true;
break;
}
}
console.log(text.substring(i - 20, i + 20));
console.log('Char:', char.charCodeAt(0));
throw new Error("TODO handle not char chars in the parse command function");
}
}
if (!findEnd) {
throw new Error("Could not end of the command");
}
len = len - curPos;
switch (commandName) {
case 'documentclass':
case 'usepackage':
case 'graphicspath':
case 'hypersetup':
case 'pagestyle':
case 'fancyhead':
case 'fancyfoot':
case 'renewcommand':
case 'setlength':
case 'addbibresource':
case 'date':
case 'maketitle':
case 'newpage':
case 'tableofcontents':
case 'includegraphics':
case 'appendix':
case 'printbibliography':
return [len];
case 'title':
case 'author':
case 'end':
console.log("TODO: add way to check the", commandName)
return [len];
case 'cite':
console.log("TODO check if it exists on the bibliography");
return [len];
case 'begin':
case 'item':
console.log("TODO handle", commandName, ":", args)
return [len];
case 'section':
return [len, {
length: args[0].length + 1,
original_length: len,
original_position: curPos - commandName.length,
position: result.length,
type: 'h1',
}, args[0] + '\n']
case 'subsection':
return [len, {
length: args[0].length + 1,
original_length: len,
original_position: curPos - commandName.length,
position: result.length,
type: 'h2',
}, args[0] + '\n']
default:
console.log("Command name:", commandName, "options:", options, "args:", args);
throw new Error("TODO handle this case");
}
}
function readUntil(text: string, char: string, curPos: number): number {
for (let i = curPos; i < text.length; i++) {
if (text[i] === char) {
return i - curPos;
}
}
throw new Error("Could not find matching pair");
}
export function parseLsp(text: string): ParseResult {
const result: ParseResult = {
text: '',
originalString: text,
conversions: [],
};
let conversionStartPosition = 0;
for (let i = 0; i < text.length; i++) {
let char = text[i];
if (char === '%') {
//console.log("Found comment");
const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);
if (possiblePartition) {
const [conv, toAdd] = possiblePartition;
result.conversions = result.conversions.concat(conv)
result.text += toAdd;
}
const len = parseComment(text, i);
i += len;
// Skip the begining \n
conversionStartPosition = i + 1;
} else if (char === '\\') {
//console.log("Found command")
const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);
if (possiblePartition) {
const [conv, toAdd] = possiblePartition;
result.conversions = result.conversions.concat(conv);
result.text += toAdd;
}
const res = parseCommand(text, i, result.text);
if (res.length === 1) {
const [len] = res;
i += len;
} else {
const [len, conv, toAdd] = res;
result.conversions.push(conv);
result.text += toAdd;
i += len;
}
conversionStartPosition = i + 1;
} else if (char === '$') {
console.log('Found math expr')
if (text[i + 1] === '$') {
throw new Error("Handle double math expression");
}
const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);
if (possiblePartition) {
const [conv, toAdd] = possiblePartition;
result.conversions = result.conversions.concat(conv);
result.text += toAdd;
}
const len = readUntil(text, '$', i + 1);
i += len + 1;
conversionStartPosition = i + 1;
} else {
//console.log(char);
}
}
return result;
}
function getLineAndChar(lineIndex: number[], offset: number): [number, number] {
let l = 0;
let r = lineIndex.length;
while (r >= l) {
const i = Math.floor((r + l) / 2);
//console.log(i, l ,r, offset, lineIndex[i]);
if (lineIndex[i + 1] < offset) {
l = i + 1;
continue;
}
if (lineIndex[i] > offset) {
r = i - 1;
continue;
}
return [i, offset - lineIndex[i]];
}
return [-1, -1];
}
function getOriginalPostion(res: ParseResult, offset: number): number {
let l = 0;
let r = res.conversions.length;
while (r >= l) {
const i = Math.floor((r + l) / 2);
const conv = res.conversions[i];
if (conv.position > offset) {
r = i - 1;
continue;
}
if (conv.position + conv.length < offset) {
l = i + 1;
continue;
}
return conv.original_position + (offset - conv.position);
}
return -1;
}
function buildLineIndex(file: string) {
const lines = file.split('\n');
let i = 0;
const lineIndex = [0];
for (const line of lines) {
i += line.length + 1;
lineIndex.push(i);
}
return lineIndex;
}
2023-12-26 20:01:14 +00:00
type Match = {
message: string,
shortMessage: string,
offset: number,
length: number,
replacements: {
value: string
}[],
context: {
text: string,
offset: number,
length: number
},
sentence: string,
rule: {
id: string,
subId: string,
description: string,
urls: {
value: string
}[],
issueType: string,
category: {
id: string,
name: string
}
}
}
2023-12-25 22:44:16 +00:00
2023-12-26 20:01:14 +00:00
export async function diagnosticsRequests(res: ParseResult): Promise < Match[] > {
2023-12-25 22:44:16 +00:00
const formData = new URLSearchParams();
formData.set('text', res.text);
formData.set('language', 'en-GB');
formData.set('username', process.env.USERNAME ?? '');
formData.set('apiKey', process.env.APIKEY ?? '');
formData.set('level', 'picky');
const rawRes = await fetch('https://api.languagetoolplus.com/v2/check', {
method: 'POST',
headers: {
'Accept': 'application/json',
},
body: formData,
});
if (rawRes.status !== 200) {
process.exit(2);
}
const body = await rawRes.json();
2023-12-26 20:01:14 +00:00
return body.matches;
}
export async function getDiagnostics(file: string): Promise<(Dialog & {replacements: string[]})[]> {
const res = parseLsp(file);
const matches = await diagnosticsRequests(res);
2023-12-25 22:44:16 +00:00
const lineIndex = buildLineIndex(file);
const diags = [];
2023-12-26 20:01:14 +00:00
for (const i of matches) {
2023-12-25 22:44:16 +00:00
const match: Match = i;
const original_position = getOriginalPostion(res, match.offset);
if (original_position == -1) {
console.log("Could not find the original position")
continue;
}
const [startLine, startChar] = getLineAndChar(lineIndex, original_position);
const [endLine, endChar] = getLineAndChar(lineIndex, original_position + match.length);
const range = {
start: { line: startLine, character: startChar },
end: { line: endLine, character: endChar },
}
diags.push({
range,
severity: Severity.Error,
message: match.message,
2023-12-26 20:01:14 +00:00
replacements: match.replacements.map(a => a.value),
2023-12-25 22:44:16 +00:00
})
}
return diags;
}