import process, { stdout } from 'node:process'; import fs from 'fs/promises'; import { FileHandle } from 'node:fs/promises'; import nPath from 'node:path'; type Command = { type: 'command' start: number; text: string; name: string; options: Text[]; inner: Text[]; }; type Comment = { type: 'comment' start: number; text: string; }; type Range = { type: 'range' start: number; text: string; }; type Token = Command | Range | Comment; type Text = { start: number, tokens: Token[]; }; function processComment(data: string, start: number): {comment: Comment, i: number} { let comment: Comment = { type: 'comment', start, text: "", }; for (let i = start; i < data.length; i++) { const char = data[i]; comment.text += char; if (char == '\n') return {comment, i}; } return {comment, i: data.length - 1}; } function isValid(test: string) { return test.match(/[a-zA-Z_\\*]/); } function processCommand(data: string, start: number): {command: Command, i: number} { let command: Command = { type: 'command', start, text: "/", name: "", options: [], inner: [], }; start++; for (let i = start; i < data.length; i++) { const char = data[i] if (char == '[') { if (!command.name) command.name = data.substring(command.start + 1, i); const {text, i: tempI} = processText(data, i, ']', true) i = tempI; command.options.push(text); } if (char == '{') { if (!command.name) command.name = data.substring(command.start + 1, i); const {text, i: tempI} = processText(data, i, '}', true); i = tempI; command.inner.push(text); } if (!isValid(char)) { if (!command.name) command.name = data.substring(command.start + 1, i); return {command, i: i - 1}; } command.text += char; } return {command, i: data.length - 1}; } function processText(data: string, start: number = 0, delimiter: string = "", exclude = false): {text: Text, i: number} { let text: Text = { start, tokens: [], } let range: Range = { type: 'range', start, text: "", }; if (exclude) start++; for (let i = start; i < data.length; i++) { const char = String(data[i]); //process.stdout.write(char); if (delimiter == char) { if (delimiter && !exclude) range.text += char; if (range.text.length > 0) { text.tokens.push(range); } return {text, i: i + 1}; } if (char == '%') { if (range.text.length > 0) { text.tokens.push(range); } let {comment, i: tempI} = processComment(data, i); i = tempI; text.tokens.push(comment); range = { type: 'range', start: i, text: "", }; continue; } if (char == '\\') { if (range.text.length > 0) { text.tokens.push(range); } let {command, i: tempI} = processCommand(data, i); i = tempI; text.tokens.push(command); range = { type: 'range', start: i, text: "" }; continue; } range.text += char; } if (delimiter) { throw new Error(`Delimiter '${delimiter}'`); } return {text, i: data.length - 1, }; } async function main() { if (process.argv.length < 3) { console.error("Not enogh arguments"); process.exit(1); } const path = process.argv[2]; const basePath =nPath.dirname(path); const stat = await fs.stat(path); if (!stat.isFile()) { console.error(`'${path}' is not a file`); process.exit(1); } const data = (await fs.readFile(path)).toString(); let {text} = processText(data); const file = (await fs.open('results.txt', 'w')); await printText(text, file, basePath); } function printItemize(text: Text, file: FileHandle, start: number): number { for (let i = start; i < text.tokens.length; i++) { const token = text.tokens[i]; if (token.type == 'range') { file.write(token.text); continue; } else if (token.type == 'command') { if (token.name == 'item') { file.write('- '); } else if (token.name == 'end') { const inner = token.inner[0].tokens[0].text; if (inner == 'itemize') { return i; } console.log('Do not know how to handle!'); console.log(token); process.exit(1); } else { console.log('Do not know how to handle!'); console.log(token); process.exit(1); } } else if (token.type == 'comment') { continue; } else { console.log('Do not know how to handle token type!'); console.log(token); process.exit(1); } } throw new Error('Did not find end at itemize!'); } async function printText(text: Text, file: FileHandle, basePath: string, foundStart: boolean = false, start: number = 0) { for (let i = start; i < text.tokens.length; i++) { const token = text.tokens[i]; if (!foundStart) { if (token.type === 'command') { if (token.name === 'begin') { const inner = token.inner[0].tokens[0].text; if (inner === 'document') { foundStart = true; continue; } } } continue; } if (token.type === 'command') { let inner: string | undefined = undefined; switch (token.name) { case 'includepdf': case 'maketitle': case 'newpage': case 'tableofcontents': case 'printbibliography': case 'supercite': case 'includegraphics': case 'vspace*': continue; case 'cref': file.write('Fig. 1'); continue; case 'section': case 'section*': inner = token.inner[0]?.tokens[0]?.text; if (!inner) { console.log(token); process.exit(1); } file.write('# ' + inner + '\n'); continue; case 'subsection': case 'subsection*': inner = token.inner[0].tokens[0].text; file.write('## ' + inner + '\n'); continue; case 'subsubsection': inner = token.inner[0].tokens[0].text; file.write('## ' + inner + '\n'); continue; case 'input': inner = token.inner[0].tokens[0].text; const path = basePath + '/' + inner + '.tex'; const nData = (await fs.readFile(path)).toString(); const {text: nText} = processText(nData); await printText(nText, file, basePath, true); continue; case 'begin': inner = token.inner[0].tokens[0].text; switch(inner) { case "figure": case "minipage": case "tabularx": i = findEnd(inner, text, i); continue; case "itemize": i = printItemize(text, file, i + 1); continue; } console.log('Do not know how to handle begin', inner); process.exit(1); case 'end': inner = token.inner[0].tokens[0].text; if (inner === 'document') { continue; } console.log('Do not know how to handle end', inner); process.exit(1); case 'verb': const nextToken = text.tokens[i + 1]; const pText = nextToken?.text; if (!pText) { console.log('Something wrong!'); console.log(token); console.log(nextToken); process.exit(1); } let j = 1; for (;j < nextToken.text.length;j++) { if (nextToken.text[j] == '+') { break; } } i++; file.write(nextToken.text.substring(1, j)); file.write(nextToken.text.substring(j + 1)); continue; } console.log("Don't know how to handle", token.name); process.exit(1); } if (token.type === 'range') { file.write(token.text); } } } function findEnd(target: string, text: Text, start: number): number { for (let i = start; i < text.tokens.length; i++) { const token = text.tokens[i]; if (token.type === 'command') { if (token.name !== 'end') { continue; } const inner = token.inner[0].tokens[0].text; if (inner == target) { return i; } } } throw new Error('Could not find end for ' + target); } main();