latex-lsp/parser.ts

614 lines
16 KiB
TypeScript
Raw Permalink Normal View History

2023-12-31 15:33:43 +00:00
import * as fs from 'fs';
2023-12-25 22:44:16 +00:00
import process from 'process';
import { Severity, type Dialog } from './lsp';
2024-02-29 15:39:13 +00:00
import { OperationCanceledException } from 'typescript';
2023-12-25 22:44:16 +00:00
type ParseResultConversion = {
2024-02-29 15:39:13 +00:00
length: number,
original_length?: number,
original_position: number,
position: number,
type: 'text' | 'h1' | 'h2'
2023-12-25 22:44:16 +00:00
};
type ParseResult = {
2024-02-29 15:39:13 +00:00
text: string,
originalString: string,
conversions: ParseResultConversion[],
2023-12-25 22:44:16 +00:00
}
// Returns the number of character skiped
// This puts the i at the \n so it's skiped
function parseComment(text: string, curPos: number): number {
2024-02-29 15:39:13 +00:00
for (let i = curPos + 1; i < text.length; i++) {
const char = text[i];
if (char === '\n') {
return i - curPos;
}
2023-12-25 22:44:16 +00:00
}
2024-02-29 15:39:13 +00:00
return text.length - curPos;
2023-12-25 22:44:16 +00:00
}
2023-12-31 15:33:43 +00:00
function createPartition(text: string, startPos: number, curPos: number, result: string, ignoreLast: number = 0): [ParseResultConversion[], string] | null {
2024-02-29 15:39:13 +00:00
curPos = curPos - ignoreLast;
if (startPos >= curPos || text.substring(startPos, curPos + 1).match(/^\s*$/)) {
return null;
}
var t = text.substring(startPos, curPos + 1);
if (!t.includes('\n')) {
return [[{
length: curPos - startPos,
position: result.length,
original_position: startPos,
type: 'text',
}], t];
}
const split = t.split('\n');
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
let nt = "";
const convs: ParseResultConversion[] = [];
let n = startPos;
let pos = result.length;
for (const line of split) {
let nLine = line.replace(/^\s*/, '');
n += line.length - nLine.length;
nt += nLine + '\n';
convs.push({
length: nLine.length,
original_position: n,
position: pos,
type: 'text'
});
pos += nLine.length + 1;
n += nLine.length + 1;
}
return [convs, nt];
2023-12-25 22:44:16 +00:00
}
function isChar(charCode: number): boolean {
2024-02-29 15:39:13 +00:00
return (charCode >= 92 && charCode <= 122) || (charCode >= 65 && charCode <= 90) || charCode == 42;
2023-12-25 22:44:16 +00:00
}
function readBalanced(endChar: string, startChar: string, text: string, curPos: number): number {
2024-02-29 15:39:13 +00:00
let bal = 1;
for (let i = curPos; i < text.length; i++) {
const char = text[i];
if (char == endChar) {
if (bal == 1) {
return i - curPos + 1;
} else {
bal -= 1;
}
} else if (char == startChar) {
bal += 1;
}
2023-12-25 22:44:16 +00:00
}
2024-02-29 15:39:13 +00:00
throw new Error("Can not find end of balance read")
2023-12-25 22:44:16 +00:00
}
function isWhiteSpace(char: string): boolean {
2024-02-29 15:39:13 +00:00
return [' ', '\t', '\n'].includes(char);
2023-12-25 22:44:16 +00:00
}
function parseCommand(text: string, curPos: number, result: string): [number, ParseResultConversion, string] | [number] {
2024-02-29 15:39:13 +00:00
if (text.length - 1 == curPos) {
throw new Error("The latex file has the wrong format the file can not end with a empty command");
}
if (text[curPos + 1] === '\\') {
return [2, {
length: 1,
position: result.length,
original_position: curPos + 1,
type: 'text'
}, '\\'];
} else if (text[curPos + 1] === '%') {
return [2, {
length: 1,
position: result.length,
original_position: curPos + 1,
type: 'text'
}, '%'];
} else if (text[curPos + 1] === '_') {
return [2, {
length: 1,
position: result.length,
original_position: curPos + 1,
type: 'text'
}, '_'];
}
let commandName = "";
let commandNameFinished = false;
// TODO store the location of the opts and args
let options = [];
let args = [];
let findEnd = false;
let len = 0;
for (let i = curPos + 1; i < text.length; i++) {
const char = text[i];
if (isChar(char.charCodeAt(0))) {
if (!commandNameFinished) {
commandName += char;
} else {
len = i;
findEnd = true;
break;
}
} else if (char === '[') {
commandNameFinished = true;
const len = readBalanced(']', '[', text, i + 1);
options.push(text.substring(i + 1, i + len))
i += len;
} else if (char === '{') {
commandNameFinished = true;
const len = readBalanced('}', '{', text, i + 1);
args.push(text.substring(i + 1, i + len))
i += len;
} else if (isWhiteSpace(char)) {
len = i;
findEnd = true;
break;
} else {
if (char == '.' || char == ',') {
if (commandNameFinished) {
len = i;
findEnd = true;
break;
}
}
console.log(text.substring(i - 20, i + 20));
console.log('Char:' + char.charCodeAt(0));
throw new Error("TODO handle not char chars in the parse command function");
2023-12-25 22:44:16 +00:00
}
2024-02-29 15:39:13 +00:00
}
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
if (!findEnd) {
throw new Error("Could not end of the command");
2023-12-25 22:44:16 +00:00
}
2024-02-29 15:39:13 +00:00
//console.log("Parsed '" + text.substring(curPos, len) + "'")
//console.log("Ranged '" + text.substring(curPos - 5 , len + 5) + "'")
len = len - curPos;
switch (commandName) {
case 'documentclass':
case 'usepackage':
case 'graphicspath':
case 'hypersetup':
case 'pagestyle':
case 'fancyhead':
case 'fancyfoot':
case 'renewcommand':
case 'setlength':
case 'addbibresource':
case 'date':
case 'maketitle':
case 'newpage':
case 'tableofcontents':
case 'includegraphics':
case 'appendix':
case 'printbibliography':
case 'vspace*':
case 'pagebreak':
case 'today':
case 'label':
return [len];
case 'title':
case 'author':
case 'end':
case 'ref':
case 'caption':
case 'footnote':
console.log("TODO: add way to check the " + commandName)
return [len];
case 'cite':
console.log("TODO check if it exists on the bibliography");
console.log(`Find cite for '${args[0]}'`);
let toAdd = "[0]"
return [len, {
length: toAdd.length,
original_length: len,
original_position: curPos,
position: result.length,
type: 'text'
}, toAdd];
// return [len];
case 'begin':
switch (args[0]) {
case 'verbatim':
const find = '\end{verbatim}';
let endPos = text.indexOf(find, curPos) + find.length;
len = endPos - curPos;
break
default:
console.log("Do not know how to handle " + args[0])
}
return [len];
case 'item':
if (args[0]) {
return [len, {
length: 2 + args[0].length + 1,
original_length: len,
original_position: curPos + commandName.length,
position: result.length,
type: 'text'
}, "— " + args[0] + '\n'];
}
return [len, {
length: 2,
original_length: len,
original_position: curPos,
position: result.length,
type: 'text'
}, "— "];
case 'section*':
case 'section':
return [len, {
length: args[0].length + 1,
original_length: len,
original_position: curPos + 2 + commandName.length,
position: result.length,
type: 'h1',
}, args[0] + '\n']
case 'subsection*':
case 'subsection':
return [len, {
length: args[0].length + 1,
original_length: len,
original_position: curPos + 2 + commandName.length,
position: result.length,
type: 'h2',
}, args[0] + '\n']
2023-12-31 15:33:43 +00:00
default:
2024-02-29 15:39:13 +00:00
console.log("Command name: " + commandName + " options: " + options + " args: " + args);
throw new Error("TODO handle this case " + commandName);
}
2023-12-25 22:44:16 +00:00
}
function readUntil(text: string, char: string, curPos: number): number {
2024-02-29 15:39:13 +00:00
for (let i = curPos; i < text.length; i++) {
if (text[i] === char) {
return i - curPos;
}
2023-12-25 22:44:16 +00:00
}
2024-02-29 15:39:13 +00:00
throw new Error("Could not find matching pair");
2023-12-25 22:44:16 +00:00
}
export function parseLsp(text: string): ParseResult {
2024-02-29 15:39:13 +00:00
const result: ParseResult = {
text: '',
originalString: text,
conversions: [],
};
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
let conversionStartPosition = 0;
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
for (let i = 0; i < text.length; i++) {
let char = text[i];
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
if (char === '%') {
//console.log("Found comment");
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);
if (possiblePartition) {
const [conv, toAdd] = possiblePartition;
result.conversions = result.conversions.concat(conv)
result.text += toAdd;
}
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
const len = parseComment(text, i);
i += len;
// Skip the begining \n
conversionStartPosition = i + 1;
} else if (char === '\\') {
//console.log("Found command")
const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);
console.log(possiblePartition)
if (possiblePartition) {
const [conv, toAdd] = possiblePartition;
result.conversions = result.conversions.concat(conv);
result.text += toAdd;
}
const res = parseCommand(text, i, result.text);
if (res.length === 1) {
const [len] = res;
i += len;
} else {
const [len, conv, toAdd] = res;
result.conversions.push(conv);
result.text += toAdd;
i += len;
}
conversionStartPosition = i + 1;
} else if (char === '$') {
console.log('Found math expr')
if (text[i + 1] === '$') {
throw new Error("Handle double math expression");
}
const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);
if (possiblePartition) {
const [conv, toAdd] = possiblePartition;
result.conversions = result.conversions.concat(conv);
result.text += toAdd;
}
const len = readUntil(text, '$', i + 1);
let to_add = 'mathexpr' + (text[i + len + 1] === ' ' ? ' ' : '');
result.conversions = result.conversions.concat(
[{
length: to_add.length,
position: result.text.length,
original_position: i,
type: 'text',
}]
);
result.text += to_add;
i += len + 1;
conversionStartPosition = i + 1;
} else if (char == '`' || char == "'") {
console.log('Found coutes')
if (text[i + 1] !== char) {
continue;
}
const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);
if (possiblePartition) {
const [conv, toAdd] = possiblePartition;
result.conversions = result.conversions.concat(conv);
result.text += toAdd;
}
let to_add = '"';
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
if (char == '`') {
to_add = "“"
} else if (char == "'") {
to_add = "”"
}
result.conversions = result.conversions.concat(
[{
length: to_add.length,
position: result.text.length,
original_position: i,
type: 'text',
}]
);
result.text += to_add;
i += 1;
conversionStartPosition = i + 1;
} else {
//console.log(char);
}
2023-12-25 22:44:16 +00:00
}
2024-02-29 15:39:13 +00:00
result.text = result.text.replace(/``/g, "''");
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
return result;
2023-12-25 22:44:16 +00:00
}
function getLineAndChar(lineIndex: number[], offset: number): [number, number] {
2024-02-29 15:39:13 +00:00
let l = 0;
let r = lineIndex.length;
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
while (r >= l) {
const i = Math.floor((r + l) / 2);
//console.log(i, l ,r, offset, lineIndex[i]);
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
if (lineIndex[i + 1] < offset) {
l = i + 1;
continue;
}
if (lineIndex[i] > offset) {
r = i - 1;
continue;
}
return [i, offset - lineIndex[i]];
2023-12-25 22:44:16 +00:00
}
2024-02-29 15:39:13 +00:00
return [-1, -1];
2023-12-25 22:44:16 +00:00
}
function getOriginalPostion(res: ParseResult, offset: number): number {
2024-02-29 15:39:13 +00:00
let l = 0;
let r = res.conversions.length;
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
while (r >= l) {
const i = Math.floor((r + l) / 2);
const conv = res.conversions[i];
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
if (conv.position > offset) {
r = i - 1;
continue;
}
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
if (conv.position + conv.length < offset) {
l = i + 1;
continue;
}
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
return conv.original_position + (offset - conv.position);
}
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
return -1;
2023-12-25 22:44:16 +00:00
}
function buildLineIndex(file: string) {
2024-02-29 15:39:13 +00:00
const lines = file.split('\n');
let i = 0;
const lineIndex = [0];
for (const line of lines) {
i += line.length + 1;
lineIndex.push(i);
}
return lineIndex;
2023-12-25 22:44:16 +00:00
}
2023-12-26 20:01:14 +00:00
type Match = {
2024-02-29 15:39:13 +00:00
message: string,
shortMessage: string,
2023-12-26 20:01:14 +00:00
offset: number,
2024-02-29 15:39:13 +00:00
length: number,
replacements: {
value: string
2023-12-26 20:01:14 +00:00
}[],
2024-02-29 15:39:13 +00:00
context: {
text: string,
offset: number,
length: number
},
sentence: string,
rule: {
id: string,
subId: string,
description: string,
urls: {
value: string
}[],
issueType: string,
category: {
id: string,
name: string
}
2023-12-26 20:01:14 +00:00
}
}
2023-12-25 22:44:16 +00:00
2023-12-31 15:33:43 +00:00
export async function diagnosticsRequests(res: ParseResult): Promise<Match[]> {
2024-02-29 15:39:13 +00:00
const formData = new URLSearchParams();
formData.set('text', res.text);
formData.set('language', 'en-GB');
formData.set('username', process.env.USERNAME_MY_LTEX ?? '');
formData.set('apiKey', process.env.APIKEY_MY_LTEX ?? '');
formData.set('level', 'picky');
const rawRes = await fetch('https://api.languagetoolplus.com/v2/check', {
method: 'POST',
headers: {
'Accept': 'application/json',
},
body: formData,
});
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
if (rawRes.status !== 200) {
console.log("Error:" + (await (await rawRes.blob()).text()))
process.exit(2);
}
const body = await rawRes.json();
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
return body.matches;
2023-12-26 20:01:14 +00:00
}
2023-12-31 15:33:43 +00:00
export type GetDiagnosticsReturn = (Dialog & { replacements: string[], rule_id: string, word?: string });
export async function getDiagnostics(file: string): Promise<GetDiagnosticsReturn[]> {
2023-12-26 20:01:14 +00:00
2024-02-29 15:39:13 +00:00
const res = parseLsp(file);
2023-12-26 20:01:14 +00:00
2024-02-29 15:39:13 +00:00
fs.writeFileSync('/tmp/latex-lsp-res', res.text)
2023-12-31 15:33:43 +00:00
2024-02-29 15:39:13 +00:00
const matches = await diagnosticsRequests(res);
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
const lineIndex = buildLineIndex(file);
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
const diags = [];
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
for (const i of matches) {
const match: Match = i;
const original_position = getOriginalPostion(res, match.offset);
if (original_position == -1) {
console.log("Could not find the original position")
continue;
}
2023-12-31 15:33:43 +00:00
2024-02-29 15:39:13 +00:00
let word: string | undefined = undefined;
2023-12-31 15:33:43 +00:00
2024-02-29 15:39:13 +00:00
if (match.rule.id.startsWith("MORFOLOGIK_RULE")) {
word = file.substring(original_position, original_position + match.length);
}
const [startLine, startChar] = getLineAndChar(lineIndex, original_position);
const [endLine, endChar] = getLineAndChar(lineIndex, original_position + match.length);
2023-12-31 15:33:43 +00:00
2024-02-29 15:39:13 +00:00
const range = {
start: { line: startLine, character: startChar },
end: { line: endLine, character: endChar },
}
2023-12-25 22:44:16 +00:00
2024-02-29 15:39:13 +00:00
diags.push({
range,
severity: Severity.Error,
message: match.message,
replacements: match.replacements.map(a => a.value),
rule_id: match.rule.id,
word,
})
2023-12-25 22:44:16 +00:00
}
2024-02-29 15:39:13 +00:00
return diags;
2023-12-25 22:44:16 +00:00
}