latex-lsp/parser.ts

import * as fs from 'fs';

import process from 'process';
import { Severity, type Dialog } from './lsp';
import { OperationCanceledException } from 'typescript';


type ParseResultConversion = {
    length: number,
    original_length?: number,
    original_position: number,
    position: number,
    type: 'text' | 'h1' | 'h2'
};

type ParseResult = {
    text: string,
    originalString: string,
    conversions: ParseResultConversion[],
}

// Returns the number of character skiped
// This puts the i at the \n so it's skiped
function parseComment(text: string, curPos: number): number {
    for (let i = curPos + 1; i < text.length; i++) {
        const char = text[i];
        if (char === '\n') {
            return i - curPos;
        }
    }
    return text.length - curPos;
}

function createPartition(text: string, startPos: number, curPos: number, result: string, ignoreLast: number = 0): [ParseResultConversion[], string] | null {
    curPos = curPos - ignoreLast;
    if (startPos >= curPos || text.substring(startPos, curPos + 1).match(/^\s*$/)) {
        return null;
    }

    var t = text.substring(startPos, curPos + 1);

    if (!t.includes('\n')) {
        return [[{
            length: curPos - startPos,
            position: result.length,
            original_position: startPos,
            type: 'text',
        }], t];
    }

    const split = t.split('\n');

    let nt = "";
    const convs: ParseResultConversion[] = [];

    let n = startPos;

    let pos = result.length;

    for (const line of split) {
        let nLine = line.replace(/^\s*/, '');
        n += line.length - nLine.length;
        nt += nLine + '\n';
        convs.push({
            length: nLine.length,
            original_position: n,
            position: pos,
            type: 'text'
        });
        pos += nLine.length + 1;
        n += nLine.length + 1;
    }

    return [convs, nt];

}

function isChar(charCode: number): boolean {
    return (charCode >= 92 && charCode <= 122) || (charCode >= 65 && charCode <= 90) || charCode == 42;
}
function readBalanced(endChar: string, startChar: string, text: string, curPos: number): number {
    let bal = 1;
    for (let i = curPos; i < text.length; i++) {
        const char = text[i];
        if (char == endChar) {
            if (bal == 1) {
                return i - curPos + 1;
            } else {
                bal -= 1;
            }
        } else if (char == startChar) {
            bal += 1;
        }
    }
    throw new Error("Can not find end of balance read")
}

function isWhiteSpace(char: string): boolean {
    return [' ', '\t', '\n'].includes(char);
}

function parseCommand(text: string, curPos: number, result: string): [number, ParseResultConversion, string] | [number] {

    if (text.length - 1 == curPos) {
        throw new Error("The latex file has the wrong format the file can not end with a empty command");
    }

    if (text[curPos + 1] === '\\') {
        return [2, {
            length: 1,
            position: result.length,
            original_position: curPos + 1,
            type: 'text'
        }, '\\'];
    } else if (text[curPos + 1] === '%') {
        return [2, {
            length: 1,
            position: result.length,
            original_position: curPos + 1,
            type: 'text'
        }, '%'];
    } else if (text[curPos + 1] === '_') {
        return [2, {
            length: 1,
            position: result.length,
            original_position: curPos + 1,
            type: 'text'
        }, '_'];
    }

    let commandName = "";
    let commandNameFinished = false;

    // TODO store the location of the opts and args
    let options = [];
    let args = [];

    let findEnd = false;


    let len = 0;

    for (let i = curPos + 1; i < text.length; i++) {
        const char = text[i];
        if (isChar(char.charCodeAt(0))) {
            if (!commandNameFinished) {
                commandName += char;
            } else {
                len = i;
                findEnd = true;
                break;
            }
        } else if (char === '[') {
            commandNameFinished = true;
            const len = readBalanced(']', '[', text, i + 1);
            options.push(text.substring(i + 1, i + len))
            i += len;
        } else if (char === '{') {
            commandNameFinished = true;
            const len = readBalanced('}', '{', text, i + 1);
            args.push(text.substring(i + 1, i + len))
            i += len;
        } else if (isWhiteSpace(char)) {
            len = i;
            findEnd = true;
            break;
        } else {

            if (char == '.' || char == ',') {
                if (commandNameFinished) {
                    len = i;
                    findEnd = true;
                    break;
                }
            }

            console.log(text.substring(i - 20, i + 20));
            console.log('Char:' + char.charCodeAt(0));
            throw new Error("TODO handle not char chars in the parse command function");
        }
    }

    if (!findEnd) {
        throw new Error("Could not end of the command");
    }

    //console.log("Parsed '" + text.substring(curPos, len) + "'")
    //console.log("Ranged '" + text.substring(curPos - 5 , len + 5) + "'")

    len = len - curPos;

    switch (commandName) {
        case 'documentclass':
        case 'usepackage':
        case 'graphicspath':
        case 'hypersetup':
        case 'pagestyle':
        case 'fancyhead':
        case 'fancyfoot':
        case 'renewcommand':
        case 'setlength':
        case 'addbibresource':
        case 'date':
        case 'maketitle':
        case 'newpage':
        case 'tableofcontents':
        case 'includegraphics':
        case 'appendix':
        case 'printbibliography':
        case 'vspace*':
        case 'pagebreak':
        case 'today':
        case 'label':
            return [len];

        case 'title':
        case 'author':
        case 'end':
        case 'ref':
        case 'caption':
        case 'footnote':
            console.log("TODO: add way to check the " + commandName)
            return [len];

        case 'cite':
            console.log("TODO check if it exists on the bibliography");
            console.log(`Find cite for '${args[0]}'`);

            let toAdd = "[0]"

            return [len, {
                length: toAdd.length,
                original_length: len,
                original_position: curPos,
                position: result.length,
                type: 'text'
            }, toAdd];

            // return [len];

        case 'begin':

            switch (args[0]) {

                case 'verbatim':

                    const find = '\end{verbatim}';

                    let endPos = text.indexOf(find, curPos) + find.length;

                    len = endPos - curPos;
                    break
                default:
                    console.log("Do not know how to handle " + args[0])
            }


            return [len];

        case 'item':

            if (args[0]) {
                return [len, {
                    length: 2 + args[0].length + 1,
                    original_length: len,
                    original_position: curPos + commandName.length,
                    position: result.length,
                    type: 'text'
                }, "— " + args[0] + '\n'];
            }

            return [len, {
                length: 2,
                original_length: len,
                original_position: curPos,
                position: result.length,
                type: 'text'
            }, "— "];

        case 'section*':
        case 'section':
            return [len, {
                length: args[0].length + 1,
                original_length: len,
                original_position: curPos + 2 + commandName.length,
                position: result.length,
                type: 'h1',
            }, args[0] + '\n']

        case 'subsection*':
        case 'subsection':
            return [len, {
                length: args[0].length + 1,
                original_length: len,
                original_position: curPos + 2 + commandName.length,
                position: result.length,
                type: 'h2',
            }, args[0] + '\n']

        default:
            console.log("Command name: " + commandName + " options: " + options + " args: " + args);
            throw new Error("TODO handle this case " + commandName);
    }


}

function readUntil(text: string, char: string, curPos: number): number {
    for (let i = curPos; i < text.length; i++) {
        if (text[i] === char) {
            return i - curPos;
        }
    }

    throw new Error("Could not find matching pair");
}

export function parseLsp(text: string): ParseResult {
    const result: ParseResult = {
        text: '',
        originalString: text,
        conversions: [],
    };

    let conversionStartPosition = 0;

    for (let i = 0; i < text.length; i++) {
        let char = text[i];

        if (char === '%') {
            //console.log("Found comment");

            const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);

            if (possiblePartition) {
                const [conv, toAdd] = possiblePartition;
                result.conversions = result.conversions.concat(conv)
                result.text += toAdd;
            }

            const len = parseComment(text, i);

            i += len;

            // Skip the begining \n
            conversionStartPosition = i + 1;

        } else if (char === '\\') {
            //console.log("Found command")

            const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);

            console.log(possiblePartition)

            if (possiblePartition) {
                const [conv, toAdd] = possiblePartition;
                result.conversions = result.conversions.concat(conv);
                result.text += toAdd;
            }

            const res = parseCommand(text, i, result.text);

            if (res.length === 1) {
                const [len] = res;
                i += len;
            } else {
                const [len, conv, toAdd] = res;

                result.conversions.push(conv);
                result.text += toAdd;
                i += len;
            }

            conversionStartPosition = i + 1;
        } else if (char === '$') {
            console.log('Found math expr')
            if (text[i + 1] === '$') {
                throw new Error("Handle double math expression");
            }

            const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);

            if (possiblePartition) {
                const [conv, toAdd] = possiblePartition;
                result.conversions = result.conversions.concat(conv);
                result.text += toAdd;
            }

            const len = readUntil(text, '$', i + 1);

            let to_add = 'mathexpr' + (text[i + len + 1] === ' ' ? ' ' : '');

            result.conversions = result.conversions.concat(
                [{
                    length: to_add.length,
                    position: result.text.length,
                    original_position: i,
                    type: 'text',
                }]
            );
            result.text += to_add;

            i += len + 1;

            conversionStartPosition = i + 1;
        } else if (char == '`' || char == "'") {
            console.log('Found coutes')
            if (text[i + 1] !== char) {
                continue;
            }

            const possiblePartition = createPartition(text, conversionStartPosition, i - 1, result.text);

            if (possiblePartition) {
                const [conv, toAdd] = possiblePartition;
                result.conversions = result.conversions.concat(conv);
                result.text += toAdd;
            }

            let to_add = '"';

            if (char == '`') {
                to_add = "“"
            } else if (char == "'") {
                to_add = "”"
            }

            result.conversions = result.conversions.concat(
                [{
                    length: to_add.length,
                    position: result.text.length,
                    original_position: i,
                    type: 'text',
                }]
            );
            result.text += to_add;

            i += 1;

            conversionStartPosition = i + 1;
        } else {
            //console.log(char);
        }

    }

    result.text = result.text.replace(/``/g, "''");

    return result;
}

function getLineAndChar(lineIndex: number[], offset: number): [number, number] {
    let l = 0;
    let r = lineIndex.length;

    while (r >= l) {
        const i = Math.floor((r + l) / 2);
        //console.log(i, l ,r, offset, lineIndex[i]);

        if (lineIndex[i + 1] < offset) {
            l = i + 1;
            continue;
        }
        if (lineIndex[i] > offset) {
            r = i - 1;
            continue;
        }
        return [i, offset - lineIndex[i]];
    }

    return [-1, -1];
}


function getOriginalPostion(res: ParseResult, offset: number): number {
    let l = 0;
    let r = res.conversions.length;

    while (r >= l) {
        const i = Math.floor((r + l) / 2);
        const conv = res.conversions[i];

        if (conv.position > offset) {
            r = i - 1;
            continue;
        }

        if (conv.position + conv.length < offset) {
            l = i + 1;
            continue;
        }

        return conv.original_position + (offset - conv.position);
    }

    return -1;
}

function buildLineIndex(file: string) {
    const lines = file.split('\n');
    let i = 0;
    const lineIndex = [0];
    for (const line of lines) {
        i += line.length + 1;
        lineIndex.push(i);
    }
    return lineIndex;
}
type Match = {
    message: string,
    shortMessage: string,
    offset: number,
    length: number,
    replacements: {
        value: string
    }[],
    context: {
        text: string,
        offset: number,
        length: number
    },
    sentence: string,
    rule: {
        id: string,
        subId: string,
        description: string,
        urls: {
            value: string
        }[],
        issueType: string,
        category: {
            id: string,
            name: string
        }
    }
}


export async function diagnosticsRequests(res: ParseResult): Promise<Match[]> {
    const formData = new URLSearchParams();

    formData.set('text', res.text);
    formData.set('language', 'en-GB');
    formData.set('username', process.env.USERNAME_MY_LTEX ?? '');
    formData.set('apiKey', process.env.APIKEY_MY_LTEX ?? '');
    formData.set('level', 'picky');

    const rawRes = await fetch('https://api.languagetoolplus.com/v2/check', {
        method: 'POST',
        headers: {
            'Accept': 'application/json',
        },
        body: formData,
    });

    if (rawRes.status !== 200) {
        console.log("Error:" + (await (await rawRes.blob()).text()))
        process.exit(2);
    }
    const body = await rawRes.json();

    return body.matches;
}

export type GetDiagnosticsReturn = (Dialog & { replacements: string[], rule_id: string, word?: string });

export async function getDiagnostics(file: string): Promise<GetDiagnosticsReturn[]> {

    const res = parseLsp(file);

    fs.writeFileSync('/tmp/latex-lsp-res', res.text)

    const matches = await diagnosticsRequests(res);

    const lineIndex = buildLineIndex(file);

    const diags = [];

    for (const i of matches) {
        const match: Match = i;
        const original_position = getOriginalPostion(res, match.offset);
        if (original_position == -1) {
            console.log("Could not find the original position")
            continue;
        }

        let word: string | undefined = undefined;

        if (match.rule.id.startsWith("MORFOLOGIK_RULE")) {
            word = file.substring(original_position, original_position + match.length);
        }

        const [startLine, startChar] = getLineAndChar(lineIndex, original_position);
        const [endLine, endChar] = getLineAndChar(lineIndex, original_position + match.length);

        const range = {
            start: { line: startLine, character: startChar },
            end: { line: endLine, character: endChar },
        }

        diags.push({
            range,
            severity: Severity.Error,
            message: match.message,
            replacements: match.replacements.map(a => a.value),
            rule_id: match.rule.id,
            word,
        })
    }

    return diags;
}