Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,5 @@ package-lock.json
yarn.lock
dist
*.pdf
*.aux
*.aux
pnpm-lock.yaml
6 changes: 1 addition & 5 deletions jest.config.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
module.exports = {
testEnvironment: 'node',
testRegex: 'test\\/.*\\.js$',
testMatch: null,
testURL: 'http://localhost/',
testTimeout: 15000,

testMatch: ['<rootDir>/test/**/*.test.js'],
coverageDirectory: 'coverage',
collectCoverage: true,
};
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"eslint-plugin-import": "^2.20.2",
"eslint-plugin-jest": "^23.8.2",
"eslint-plugin-prettier": "^3.1.3",
"jest": "^25.4.0",
"jest": "^29.7.0",
"prettier": "^2.0.5",
"rollup": "^2.7.3",
"rollup-plugin-preserve-shebangs": "^0.2.0",
Expand Down
71 changes: 65 additions & 6 deletions src/convert.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,16 @@ import {
bold,
italic,
underline,
strikethrough,
superscript,
subscript,
hyperlink,
divider,
itemize,
enumerate,
item,
image,
escapeLatexSpecialChars
} from './templates';

const pipeline = promisify(pipelineSync);
Expand All @@ -33,6 +38,9 @@ function analyzeForPackageImports(HTMLText) {
if (HTMLText.includes('\\cfrac')) pkgs.push('amsmath');
if (HTMLText.includes('<img')) pkgs.push('graphicx');
if (HTMLText.includes('\\therefore')) pkgs.push('amssymb');
if (HTMLText.includes('<s>')) pkgs.push('ulem');
if (HTMLText.includes('</a>')) pkgs.push('hyperref');
if (HTMLText.includes('</code>')) pkgs.push('listings');

return pkgs;
}
Expand Down Expand Up @@ -84,12 +92,17 @@ async function convertImage(
});
}


function convertPlainText(value, opts) {
const breakReplacement = opts.ignoreBreaks ? '' : '\n\n';
const cleanText = value
.replace(/(\n|\r)/g, breakReplacement) // Standardize line breaks or remove them
.replace(/\t/g, '') // Remove tabs
.replace(/(?<!\\)%/g, '\\%');
// .replace(/\\(?!\\|%|&|_|\$|#|\{|\}|~|\^|<|>|"|\|)/g, '\\textbackslash{}')
.replace(/(\\)([%&#~<>\|])|([%&#~<>\|])/g, escapeLatexSpecialChars);
// Ideally, we would check for all special characters, e.g., /(\\)([%&_$#{}~^<>|"])|([%&_$#{}~^<>|"])/g
// However, we are currently allowing equations to be written in the HTML file.

const decodedText = decodeHTML(cleanText);

return opts.preferDollarInlineMath ? decodedText.replace(/\\\(|\\\)/g, '$') : decodedText;
Expand All @@ -103,17 +116,25 @@ async function convertRichTextSingle(n, opts) {
case 'strong':
return convertRichText(n, opts).then((t) => bold(t));
case 'i':
case 'em':
return convertRichText(n, opts).then((t) => italic(t));
case 'u':
return convertRichText(n, opts).then((t) => underline(t));
case 's':
return convertRichText(n, opts).then((t) => strikethrough(t));
case 'sub':
return convertRichText(n, opts).then((t) => subscript(t));
case 'sup':
return convertRichText(n, opts).then((t) => superscript(t));
case 'br':
return opts.ignoreBreaks ? ' ' : '\n\n';
case 'span':
return convertRichText(n, opts);
case 'a':
return convertRichText(n, opts).then((t) => hyperlink(t, n.attrs.find(({ name }) => name === 'href').value));
case '#text':
return convertPlainText(n.value, opts);
default:
return '';
// we allow unknown tags to pass through
return convertRichText(n, opts);
}
}

Expand Down Expand Up @@ -184,18 +205,21 @@ export async function convert(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'img',
'hr',
'div',
'section',
'body',
'html',
'header',
'footer',
'aside',
'p',
'table',
'code'
];
const doc = [];
const opts = {
Expand Down Expand Up @@ -235,6 +259,9 @@ export async function convert(
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
doc.push(convertHeading(n, opts));
break;
case 'ul':
Expand Down Expand Up @@ -284,6 +311,21 @@ export async function convert(
}),
);
break;
case 'table':
if (n.childNodes.length === 0)
break;
if (n.childNodes[0].nodeName == 'tbody')
doc.push(convertTable(n.childNodes[0], opts));
else
doc.push(convertTable(n, opts));
break;
case 'code':
doc.push(
convertRichText(n, opts).then((t) => {
const trimmed = t.trim();
return '\\begin{lstlisting}\n' + trimmed + '\n\\end{lstlisting}';
}),
);
default:
}
});
Expand Down Expand Up @@ -316,3 +358,20 @@ export async function convertFile(filepath, { outputFilepath = filepath, ...opti

await exportFile(processed, outputFilepath, dirname(filepath));
}

async function convertTable(node, opts) {
const rows = Array.from(node.childNodes).filter(n => n.nodeName === 'tr');
const processedRows = await Promise.all(rows.map(row => convertTableRow(row, opts)));
return '\\begin{tabular}{|' + 'c|'.repeat(processedRows[0].split('&').length) + '}\n' +
'\t\\hline\n\t' + processedRows.join('\t\\hline\n\t') + '\t\\hline\n\t' + '\\end{tabular}';
}

async function processTableCells(cells, opts) {
return Promise.all(cells.map(cell => convertRichText(cell, opts)));
}

async function convertTableRow(row, opts) {
const cells = Array.from(row.childNodes).filter(n => n.nodeName === 'td' || n.nodeName === 'th');
const processedCells = await processTableCells(cells, opts);
return processedCells.join(' & ') + ' \\\\\n'; // LaTeX column separator & line end
}
26 changes: 26 additions & 0 deletions src/templates.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ export const subsubsection = (text) => `\\subsubsection*{${text}}`;
export const bold = (text) => `\\textbf{${text}}`;
export const italic = (text) => `\\textit{${text}}`;
export const underline = (text) => `\\underline{${text}}`;
export const strikethrough = (text) => `\\sout{${text}}`;
export const superscript = (text) => `$^{${text}}$`;
export const subscript = (text) => `$_{${text}}$`;

export const hyperlink = (text, url) => `\\href{${url}}{${text}}`;

export const divider = nls('\\hrule');

Expand Down Expand Up @@ -54,3 +59,24 @@ export function beginDocument({ title, includeDate = false, author } = {}) {

export const endDocument = nlp('\\end{document}');
export const docClass = (className) => `\\documentclass{${className}}`;

export const escapeLatexSpecialChars = (match, p1) => {
if (p1) return match;

const latexSpecialCharsMap = {
'\\': '\\textbackslash{}',
'{': '\\{',
'}': '\\}',
'%': '\\%',
'$': '\\$',
'&': '\\&',
'#': '\\#',
'^': '\\^{}',
'_': '\\_',
'~': '\\textasciitilde{}',
'%': '\\%',
'|': '\\textbar{}',
};

return latexSpecialCharsMap[match] || match;
};
Loading