diff --git a/.gitignore b/.gitignore index 7133ef1..b0d28c5 100644 --- a/.gitignore +++ b/.gitignore @@ -131,4 +131,5 @@ package-lock.json yarn.lock dist *.pdf -*.aux \ No newline at end of file +*.aux +pnpm-lock.yaml \ No newline at end of file diff --git a/jest.config.js b/jest.config.js index a8a5f23..2088814 100644 --- a/jest.config.js +++ b/jest.config.js @@ -1,10 +1,6 @@ module.exports = { testEnvironment: 'node', - testRegex: 'test\\/.*\\.js$', - testMatch: null, - testURL: 'http://localhost/', - testTimeout: 15000, - + testMatch: ['/test/**/*.test.js'], coverageDirectory: 'coverage', collectCoverage: true, }; diff --git a/package.json b/package.json index 9bf0d63..97fbb47 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ "eslint-plugin-import": "^2.20.2", "eslint-plugin-jest": "^23.8.2", "eslint-plugin-prettier": "^3.1.3", - "jest": "^25.4.0", + "jest": "^29.7.0", "prettier": "^2.0.5", "rollup": "^2.7.3", "rollup-plugin-preserve-shebangs": "^0.2.0", diff --git a/src/convert.js b/src/convert.js index c4f3368..f8612c3 100644 --- a/src/convert.js +++ b/src/convert.js @@ -18,11 +18,16 @@ import { bold, italic, underline, + strikethrough, + superscript, + subscript, + hyperlink, divider, itemize, enumerate, item, image, + escapeLatexSpecialChars } from './templates'; const pipeline = promisify(pipelineSync); @@ -33,6 +38,9 @@ function analyzeForPackageImports(HTMLText) { if (HTMLText.includes('\\cfrac')) pkgs.push('amsmath'); if (HTMLText.includes('')) pkgs.push('ulem'); + if (HTMLText.includes('')) pkgs.push('hyperref'); + if (HTMLText.includes('')) pkgs.push('listings'); return pkgs; } @@ -84,12 +92,17 @@ async function convertImage( }); } + function convertPlainText(value, opts) { const breakReplacement = opts.ignoreBreaks ? '' : '\n\n'; const cleanText = value .replace(/(\n|\r)/g, breakReplacement) // Standardize line breaks or remove them .replace(/\t/g, '') // Remove tabs - .replace(/(?|"|\|)/g, '\\textbackslash{}') + .replace(/(\\)([%&#~<>\|])|([%&#~<>\|])/g, escapeLatexSpecialChars); + // Ideally, we would check for all special characters, e.g., /(\\)([%&_$#{}~^<>|"])|([%&_$#{}~^<>|"])/g + // However, we are currently allowing equations to be written in the HTML file. + const decodedText = decodeHTML(cleanText); return opts.preferDollarInlineMath ? decodedText.replace(/\\\(|\\\)/g, '$') : decodedText; @@ -103,17 +116,25 @@ async function convertRichTextSingle(n, opts) { case 'strong': return convertRichText(n, opts).then((t) => bold(t)); case 'i': + case 'em': return convertRichText(n, opts).then((t) => italic(t)); case 'u': return convertRichText(n, opts).then((t) => underline(t)); + case 's': + return convertRichText(n, opts).then((t) => strikethrough(t)); + case 'sub': + return convertRichText(n, opts).then((t) => subscript(t)); + case 'sup': + return convertRichText(n, opts).then((t) => superscript(t)); case 'br': return opts.ignoreBreaks ? ' ' : '\n\n'; - case 'span': - return convertRichText(n, opts); + case 'a': + return convertRichText(n, opts).then((t) => hyperlink(t, n.attrs.find(({ name }) => name === 'href').value)); case '#text': return convertPlainText(n.value, opts); default: - return ''; + // we allow unknown tags to pass through + return convertRichText(n, opts); } } @@ -184,18 +205,21 @@ export async function convert( 'h1', 'h2', 'h3', + 'h4', + 'h5', + 'h6', 'ul', 'ol', 'img', 'hr', 'div', - 'section', 'body', 'html', 'header', 'footer', - 'aside', 'p', + 'table', + 'code' ]; const doc = []; const opts = { @@ -235,6 +259,9 @@ export async function convert( case 'h1': case 'h2': case 'h3': + case 'h4': + case 'h5': + case 'h6': doc.push(convertHeading(n, opts)); break; case 'ul': @@ -284,6 +311,21 @@ export async function convert( }), ); break; + case 'table': + if (n.childNodes.length === 0) + break; + if (n.childNodes[0].nodeName == 'tbody') + doc.push(convertTable(n.childNodes[0], opts)); + else + doc.push(convertTable(n, opts)); + break; + case 'code': + doc.push( + convertRichText(n, opts).then((t) => { + const trimmed = t.trim(); + return '\\begin{lstlisting}\n' + trimmed + '\n\\end{lstlisting}'; + }), + ); default: } }); @@ -316,3 +358,20 @@ export async function convertFile(filepath, { outputFilepath = filepath, ...opti await exportFile(processed, outputFilepath, dirname(filepath)); } + +async function convertTable(node, opts) { + const rows = Array.from(node.childNodes).filter(n => n.nodeName === 'tr'); + const processedRows = await Promise.all(rows.map(row => convertTableRow(row, opts))); + return '\\begin{tabular}{|' + 'c|'.repeat(processedRows[0].split('&').length) + '}\n' + + '\t\\hline\n\t' + processedRows.join('\t\\hline\n\t') + '\t\\hline\n\t' + '\\end{tabular}'; +} + +async function processTableCells(cells, opts) { + return Promise.all(cells.map(cell => convertRichText(cell, opts))); +} + +async function convertTableRow(row, opts) { + const cells = Array.from(row.childNodes).filter(n => n.nodeName === 'td' || n.nodeName === 'th'); + const processedCells = await processTableCells(cells, opts); + return processedCells.join(' & ') + ' \\\\\n'; // LaTeX column separator & line end +} \ No newline at end of file diff --git a/src/templates.js b/src/templates.js index e0e4030..b2cb011 100644 --- a/src/templates.js +++ b/src/templates.js @@ -13,6 +13,11 @@ export const subsubsection = (text) => `\\subsubsection*{${text}}`; export const bold = (text) => `\\textbf{${text}}`; export const italic = (text) => `\\textit{${text}}`; export const underline = (text) => `\\underline{${text}}`; +export const strikethrough = (text) => `\\sout{${text}}`; +export const superscript = (text) => `$^{${text}}$`; +export const subscript = (text) => `$_{${text}}$`; + +export const hyperlink = (text, url) => `\\href{${url}}{${text}}`; export const divider = nls('\\hrule'); @@ -54,3 +59,24 @@ export function beginDocument({ title, includeDate = false, author } = {}) { export const endDocument = nlp('\\end{document}'); export const docClass = (className) => `\\documentclass{${className}}`; + +export const escapeLatexSpecialChars = (match, p1) => { + if (p1) return match; + + const latexSpecialCharsMap = { + '\\': '\\textbackslash{}', + '{': '\\{', + '}': '\\}', + '%': '\\%', + '$': '\\$', + '&': '\\&', + '#': '\\#', + '^': '\\^{}', + '_': '\\_', + '~': '\\textasciitilde{}', + '%': '\\%', + '|': '\\textbar{}', +}; + + return latexSpecialCharsMap[match] || match; +}; diff --git a/test/unit/convert.js b/test/unit/convert.js deleted file mode 100644 index 755c3cc..0000000 --- a/test/unit/convert.js +++ /dev/null @@ -1,479 +0,0 @@ -import { directory } from 'tempy'; -import { pathExists, remove, readFile } from 'fs-extra'; -import { resolve } from 'path'; -import ShortId from 'shortid'; -import { convertText, exportFile, convertFile } from '../../src/convert'; - -describe('exportFile', () => { - let dir; - - beforeEach(() => { - dir = directory(); - }); - - afterEach(async () => { - await remove(dir); - }); - - it('should export latex file', async () => { - await exportFile('testing', 'test', dir); - - const exists = await pathExists(resolve(dir, 'test.tex')); - - expect(exists).toBeTruthy(); - }); -}); - -describe('convertText', () => { - describe('Document wrapper', () => { - it('should insert the basic document wrapper and default document class of article', async () => { - const html = ``; - const tex = await convertText(html, { includeDocumentWrapper: true }); - - expect(tex).toBe('\\documentclass{article}\n\n\\begin{document}\n\n\n\\end{document}'); - }); - - it('should insert the basic document heading with author', async () => { - const html = ``; - const tex = await convertText(html, { includeDocumentWrapper: true, author: 'Takashi' }); - - expect(tex).toBe( - '\\documentclass{article}\n\n\\author{Takashi}\n\n\\begin{document}\n\n\n\\end{document}', - ); - }); - - it('should insert the basic document heading with title', async () => { - const html = ``; - const tex = await convertText(html, { - includeDocumentWrapper: true, - title: 'Altered Carbon', - }); - - expect(tex).toBe( - '\\documentclass{article}\n\n\\title{Altered Carbon}\n\n\\begin{document}\n\n\\maketitle\n\n\n\\end{document}', - ); - }); - - it('should insert the basic document heading with date', async () => { - const html = ``; - const tex = await convertText(html, { includeDocumentWrapper: true, includeDate: true }); - - expect(tex).toBe( - '\\documentclass{article}\n\n\\date{\\today}\n\n\\begin{document}\n\n\n\\end{document}', - ); - }); - }); - - describe('Converting embedded sectioning tags', () => { - it('should properly convert section tags', async () => { - const html = `
Test
`; - const tex = await convertText(html); - - expect(tex).toBe('Test'); - }); - - it('should properly convert aside tags', async () => { - const html = ``; - const tex = await convertText(html); - - expect(tex).toBe('Test'); - }); - - it('should properly convert div tags', async () => { - const html = `
Test
`; - const tex = await convertText(html); - - expect(tex).toBe('Test'); - }); - - it('should properly convert html tags', async () => { - const html = `Test`; - const tex = await convertText(html); - - expect(tex).toBe('Test'); - }); - - it('should properly convert header tags', async () => { - const html = `
Test
`; - const tex = await convertText(html); - - expect(tex).toBe('Test'); - }); - - it('should properly convert footer tags', async () => { - const html = `
Test
`; - const tex = await convertText(html); - - expect(tex).toBe('Test'); - }); - }); - - describe('Converting general text', () => { - it('should convert simple text tag with bold `b` styling', async () => { - const html = `

Styled Text

`; - const tex = await convertText(html); - - expect(tex).toBe('Styled \\textbf{Text}'); - }); - - it('should convert simple text tag with bold `strong` styling', async () => { - const html = `

Styled Text

`; - const tex = await convertText(html); - - expect(tex).toBe('Styled \\textbf{Text}'); - }); - - it('should convert simple text tag with italics styling', async () => { - const html = `

Styled Text

`; - const tex = await convertText(html); - - expect(tex).toBe('Styled \\textit{Text}'); - }); - - it('should convert simple text tag with underline styling', async () => { - const html = `

Styled Text

`; - const tex = await convertText(html); - - expect(tex).toBe('Styled \\underline{Text}'); - }); - - it('should convert text tag with span nesting', async () => { - const html = `

Styled Text

`; - const tex = await convertText(html); - - expect(tex).toBe('Styled Text'); - }); - - it('should ignore `\t`', async () => { - const html = `

Styled\tText

`; - const tex = await convertText(html); - - expect(tex).toBe('StyledText'); - }); - - it('should escape `%`', async () => { - const html = `

Styled%Text

`; - const tex = await convertText(html); - - expect(tex).toBe('Styled\\%Text'); - }); - - it('should not escape `%` if its already escaped', async () => { - const html = `

Styled\\%Text

`; - const tex = await convertText(html); - - expect(tex).toBe('Styled\\%Text'); - }); - }); - - describe('Converting text with different types of breaks', () => { - it('should convert simple `p` tag text with `br` tags. These will be ignored by default', async () => { - const html = `

Styled
Text

`; - const tex = await convertText(html); - - expect(tex).toBe('Styled Text'); - }); - - it('should convert simple `p` tag text with `br` tags and the ignoreBreaks argument set to false', async () => { - const html = `

Styled
Text

`; - const tex = await convertText(html, { ignoreBreaks: false }); - - expect(tex).toBe('Styled\n\nText'); - }); - - it('should convert simple text with `\n` and the ignoreBreaks argument set to false', async () => { - const html = `

Styled\nText

`; - const tex = await convertText(html, { ignoreBreaks: false }); - - expect(tex).toBe('Styled\n\nText'); - }); - - it('should convert simple text with `\r` and the ignoreBreaks argument set to false', async () => { - const html = `

Styled\rText

`; - const tex = await convertText(html, { ignoreBreaks: false }); - - expect(tex).toBe('Styled\n\nText'); - }); - }); - - describe('Unwrapped content', () => { - it('should convert simple text with `br` tags and the ignoreBreaks argument set to false', async () => { - const html = `Styled
Text`; - const tex = await convertText(html, { ignoreBreaks: false }); - - expect(tex).toBe('Styled\n\nText'); - }); - - it('should convert complex text with `br` tags and the ignoreBreaks argument set to false', async () => { - const html = `Three concentric metal shells
More text here.

Inner p tag

`; - const tex = await convertText(html, { ignoreBreaks: false }); - - expect(tex).toBe('Three concentric metal shells\n\nMore text here.\n\nInner p tag'); - }); - }); - - describe('Converting text with equations', () => { - it('should convert eq wrappers p tags with only an eq to use the \\[ wrapper instead of \\(', async () => { - const html = `

\\(x = 5\\Omega\\)

`; - const tex = await convertText(html); - - expect(tex).toBe('\\[x = 5\\Omega\\]'); - }); - - it('should convert p tags with only an eq to use the \\[ wrapper instead of $', async () => { - const html = `

$x = 5\\Omega$

`; - const tex = await convertText(html); - - expect(tex).toBe('\\[x = 5\\Omega\\]'); - }); - - it('should not convert p tags with only an eq to use the \\[ wrapper instead of \\( if skipWrappingEquations is true', async () => { - const html = `

\\(x = 5\\Omega\\)

`; - const tex = await convertText(html, { skipWrappingEquations: true }); - - expect(tex).toBe('\\(x = 5\\Omega\\)'); - }); - - it('should not convert p tags with only an eq to use the \\[ wrapper instead of $ if skipWrappingEquations is true', async () => { - const html = `

$x = 5\\Omega$

`; - const tex = await convertText(html, { skipWrappingEquations: true }); - - expect(tex).toBe('$x = 5\\Omega$'); - }); - - it('should not modify eq wrappers in p tags with an eq and other content', async () => { - const html = `

Some content $x = 5\\Omega$

`; - const tex = await convertText(html); - - expect(tex).toBe('Some content $x = 5\\Omega$'); - }); - - it('should prefer $ eq wrappers if configuration is given', async () => { - const html = `

Some content \\(x = 5\\Omega\\)

`; - const tex = await convertText(html, { preferDollarInlineMath: true }); - - expect(tex).toBe('Some content $x = 5\\Omega$'); - }); - - it('should handle eqs deep within text without tag wrapping', async () => { - const html = - 'This is some plain text \\(A,{\\rm{ }}B\\) and \\(C\\) with random equations \\(a,{\\rm{ }}b\\) and \\(c\\) \\((a < b < c)\\)'; - const tex = await convertText(html, { preferDollarInlineMath: true }); - - expect(tex).toBe( - 'This is some plain text $A,{\\rm{ }}B$ and $C$ with random equations $a,{\\rm{ }}b$ and $c$ $(a < b < c)$', - ); - }); - }); - - describe('Converting H tags', () => { - it('should convert simple h tag without special chars', async () => { - const html = `

Heading

`; - const tex = await convertText(html); - - expect(tex).toBe('\\section*{\\centering{Heading}}'); - }); - - it('should convert simple h2 tag without special chars', async () => { - const html = `

Heading

`; - const tex = await convertText(html); - - expect(tex).toBe('\\subsection*{Heading}'); - }); - - it('should convert simple h3 tag without special chars', async () => { - const html = `

Heading

`; - const tex = await convertText(html); - - expect(tex).toBe('\\subsubsection*{Heading}'); - }); - - it('should convert simple h tag with special chars', async () => { - const html = `

Heading's

`; - const tex = await convertText(html); - - expect(tex).toBe("\\section*{\\centering{Heading's}}"); - }); - - it('should convert h tag with embedded css', async () => { - const html = `

Heading's

`; - const tex = await convertText(html); - - expect(tex).toBe("\\section*{\\centering{Heading's}}"); - }); - - it('should convert h tag with embedded css and special characters', async () => { - const html = - '

Newton's Laws of Motion

'; - const tex = await convertText(html); - - expect(tex).toBe("\\section*{\\centering{\\underline{\\textbf{Newton's Laws of Motion}}}}"); - }); - }); - - describe('Converting divider tags', () => { - it('should convert simple divider tag', async () => { - const html = `

Text


More Text

`; - const tex = await convertText(html); - - expect(tex).toBe('Text\n\n\\hrule\n\n\nMore Text'); - }); - }); - - describe('Converting img tags', () => { - it('should convert simple img tag', async () => { - const html = ``; - const tex = await convertText(html, { autoGenImageNames: false }); - - expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image.png}\n\\end{center}'); - }); - - it('should convert wrapped img tag', async () => { - const spy = jest.spyOn(ShortId, 'generate'); - spy.mockImplementation(() => 'image2'); - - const html = `

`; - const tex = await convertText(html); - - expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image2.png}\n\\end{center}'); - - spy.mockClear(); - }); - - it('should default to a jpg extension when converting img tag with a image url without a extension', async () => { - const spy = jest.spyOn(ShortId, 'generate'); - spy.mockImplementation(() => 'image2'); - - const html = `

`; - const tex = await convertText(html); - - expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image2.jpg}\n\\end{center}'); - - spy.mockClear(); - }); - - it('should add width restrictions when given', async () => { - const html = ``; - const tex = await convertText(html, { autoGenImageNames: false, imageWidth: '2cm' }); - - expect(tex).toBe( - '\\begin{center}\n\t\\includegraphics[width=2cm]{images/image.png}\n\\end{center}', - ); - }); - - it('should add height restrictions when given', async () => { - const html = ``; - const tex = await convertText(html, { autoGenImageNames: false, imageHeight: '2cm' }); - - expect(tex).toBe( - '\\begin{center}\n\t\\includegraphics[height=2cm]{images/image.png}\n\\end{center}', - ); - }); - - it('should keep aspect ratio when given and width or height are restricted', async () => { - const html = ``; - const tex = await convertText(html, { - autoGenImageNames: false, - imageHeight: '2cm', - keepImageAspectRatio: true, - }); - - expect(tex).toBe( - '\\begin{center}\n\t\\includegraphics[height=2cm,keepaspectratio]{images/image.png}\n\\end{center}', - ); - }); - - it('should ignore aspect ratio when given if width or height are not restricted', async () => { - const html = ``; - const tex = await convertText(html, { autoGenImageNames: false, keepImageAspectRatio: true }); - - expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image.png}\n\\end{center}'); - }); - - it('should not center the image', async () => { - const html = ``; - const tex = await convertText(html, { autoGenImageNames: false, centerImages: false }); - - expect(tex).toBe('\\includegraphics{images/image.png}'); - }); - }); - - describe('Converting list tags', () => { - it('should convert simple ul list tag', async () => { - const html = ``; - const tex = await convertText(html); - - expect(tex).toBe('\\begin{itemize}\n\t\\item Angle reaction\n\\end{itemize}'); - }); - - it('should convert simple ol list tag', async () => { - const html = `
  1. Angle reaction
`; - const tex = await convertText(html); - - expect(tex).toBe('\\begin{enumerate}\n\t\\item Angle reaction\n\\end{enumerate}'); - }); - }); - - describe('Converting with debug flag', () => { - it('should display errors when converting img tag with an inaccessible source url with the debug flag', async () => { - const spy = jest.spyOn(console, 'debug').mockImplementation(); - const html = ``; - - await convertText(html, { autoGenImageNames: false, debug: true }); - - expect(spy).toBeCalledTimes(2); - - spy.mockRestore(); - }); - - it('should not display errors when converting img tag with an inaccessible source url without the debug flag', async () => { - const spy = jest.spyOn(console, 'debug').mockImplementation(); - const html = ``; - - await convertText(html, { autoGenImageNames: false }); - - expect(spy).toBeCalledTimes(0); - - spy.mockRestore(); - }); - }); -}); - -describe('convertFile', () => { - describe('Converting mixed tags', () => { - it('should convert text with a mixture of nested tags', async () => { - await convertFile(resolve(__dirname, '../test-cases/2/index.html'), { - includeDocumentWrapper: false, - }); - - const tex = await readFile(resolve(__dirname, '../test-cases/2/index.html.tex'), 'utf-8'); - const text = [ - "\\section*{\\centering{\\underline{\\textbf{Newton's Laws of Motion}}}}", - '', - '\\subsection*{\\textbf{Concept of Forces}}', - '', - 'Some types of forces may be (i) Contact forces, (ii) Non-contact forces \\textbf{Contact forces} involve physical contact between two objects.', - ]; - - expect(tex).toBe(text.join('\n')); - - await remove(resolve(__dirname, '../test-cases/2/index.html.tex')); - }); - }); - - it('should convert text without tag wrapper while ignoring break tags', async () => { - const spy = jest.spyOn(ShortId, 'generate'); - spy.mockImplementation(() => 'image2'); - - await convertFile(resolve(__dirname, '../test-cases/3/index.html'), { ignoreBreaks: false }); - - const tex = await readFile(resolve(__dirname, '../test-cases/3/index.html.tex'), 'utf-8'); - const ref = await readFile(resolve(__dirname, '../test-cases/3/output.tex'), 'utf-8'); - - expect(tex).toBe(ref); - - await remove(resolve(__dirname, '../test-cases/3/index.html.tex')); - - spy.mockClear(); - }); -}); diff --git a/test/unit/document.test.js b/test/unit/document.test.js new file mode 100644 index 0000000..4e4d820 --- /dev/null +++ b/test/unit/document.test.js @@ -0,0 +1,40 @@ +import { convertText, exportFile, convertFile } from '../../src/convert'; + +describe('Document wrapper', () => { + it('should insert the basic document wrapper and default document class of article', async () => { + const html = ``; + const tex = await convertText(html, { includeDocumentWrapper: true }); + + expect(tex).toBe('\\documentclass{article}\n\n\\begin{document}\n\n\n\\end{document}'); + }); + + it('should insert the basic document heading with author', async () => { + const html = ``; + const tex = await convertText(html, { includeDocumentWrapper: true, author: 'Takashi' }); + + expect(tex).toBe( + '\\documentclass{article}\n\n\\author{Takashi}\n\n\\begin{document}\n\n\n\\end{document}', + ); + }); + + it('should insert the basic document heading with title', async () => { + const html = ``; + const tex = await convertText(html, { + includeDocumentWrapper: true, + title: 'Altered Carbon', + }); + + expect(tex).toBe( + '\\documentclass{article}\n\n\\title{Altered Carbon}\n\n\\begin{document}\n\n\\maketitle\n\n\n\\end{document}', + ); + }); + + it('should insert the basic document heading with date', async () => { + const html = ``; + const tex = await convertText(html, { includeDocumentWrapper: true, includeDate: true }); + + expect(tex).toBe( + '\\documentclass{article}\n\n\\date{\\today}\n\n\\begin{document}\n\n\n\\end{document}', + ); + }); + }); \ No newline at end of file diff --git a/test/unit/escaping.test.js b/test/unit/escaping.test.js new file mode 100644 index 0000000..64b4cdc --- /dev/null +++ b/test/unit/escaping.test.js @@ -0,0 +1,85 @@ +import { convertText, exportFile, convertFile } from '../../src/convert'; + +describe('Escapes illegal characters', () => { + + it('should ignore `\t`', async () => { + const html = `

Styled\tText

`; + const tex = await convertText(html); + + expect(tex).toBe('StyledText'); + }); + + it('should escape `%`', async () => { + const html = `

Styled%Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled\\%Text'); + }); + + it('should escape `&`', async () => { + const html = `

Styled&Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled\\&Text'); + }); + + it('should escape `#`, `~`', async () => { + const html = `

Styled#Text ~Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled\\#Text \\textasciitilde{}Text'); + }); + + // this was removed because it would be fail since we are allowing inline math in HTML. + // it('should escape `$`, `#`, `_`, `{`, `}`, `~`, `^`', async () => { + // const html = `

Styled$Text #Text _Text {Text} ~Text ^Text

`; + // const tex = await convertText(html); + + // expect(tex).toBe('Styled\\$Text \\#Text \\_Text \\{Text\\} \\~Text \\^Text'); + // }); + + it('should escape `|`', async () => { + const html = `

Styled|Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled\\textbar{}Text'); + }); + + // Again, this was removed because we are allowing inline math in HTML. + // it('should escape `\\`', async () => { + // const html = `

Styled\\Text

`; + // const tex = await convertText(html); + + // expect(tex).toBe('Styled\\textbackslash{}Text'); + // }); + + it('should not escape a char e.g. `%` if its already escaped', async () => { + const html = `

Styled\\%Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled\\%Text'); + }); + + it('should convert simple tags with special chars', async () => { + const html = `

Heading's

Section's

Paragraph's

`; + const tex = await convertText(html); + + expect(tex).toBe("\\section*{\\centering{Heading's}}\n\nSection's\n\nParagraph's"); + }); + + it('should convert simple tags with embedded css', async () => { + const html = `

Heading's

`; + const tex = await convertText(html); + + expect(tex).toBe("\\section*{\\centering{Heading's}}"); + }); + + it('should convert simple tags with ignoring any attributes', async () => { + const html = `

Heading's

`; + const tex = await convertText(html); + + expect(tex).toBe("\\section*{\\centering{Heading's}}"); + }); + + +}); \ No newline at end of file diff --git a/test/unit/files.test.js b/test/unit/files.test.js new file mode 100644 index 0000000..f8a1f35 --- /dev/null +++ b/test/unit/files.test.js @@ -0,0 +1,65 @@ +import { directory } from 'tempy'; +import { pathExists, remove, readFile } from 'fs-extra'; +import { resolve } from 'path'; +import ShortId from 'shortid'; +import { convertText, exportFile, convertFile } from '../../src/convert'; + +describe('exportFile', () => { + let dir; + + beforeEach(() => { + dir = directory(); + }); + + afterEach(async () => { + await remove(dir); + }); + + it('should export latex file', async () => { + await exportFile('testing', 'test', dir); + + const exists = await pathExists(resolve(dir, 'test.tex')); + + expect(exists).toBeTruthy(); + }); +}); + + +describe('convertFile', () => { + describe('Converting mixed tags', () => { + it('should convert text with a mixture of nested tags', async () => { + await convertFile(resolve(__dirname, '../test-cases/2/index.html'), { + includeDocumentWrapper: false, + }); + + const tex = await readFile(resolve(__dirname, '../test-cases/2/index.html.tex'), 'utf-8'); + const text = [ + "\\section*{\\centering{\\underline{\\textbf{Newton's Laws of Motion}}}}", + '', + '\\subsection*{\\textbf{Concept of Forces}}', + '', + 'Some types of forces may be (i) Contact forces, (ii) Non-contact forces \\textbf{Contact forces} involve physical contact between two objects.', + ]; + + expect(tex).toBe(text.join('\n')); + + await remove(resolve(__dirname, '../test-cases/2/index.html.tex')); + }); + }); + + it('should convert text without tag wrapper while ignoring break tags', async () => { + const spy = jest.spyOn(ShortId, 'generate'); + spy.mockImplementation(() => 'image2'); + + await convertFile(resolve(__dirname, '../test-cases/3/index.html'), { ignoreBreaks: false }); + + const tex = await readFile(resolve(__dirname, '../test-cases/3/index.html.tex'), 'utf-8'); + const ref = await readFile(resolve(__dirname, '../test-cases/3/output.tex'), 'utf-8'); + + expect(tex).toBe(ref); + + await remove(resolve(__dirname, '../test-cases/3/index.html.tex')); + + spy.mockClear(); + }); + }); \ No newline at end of file diff --git a/test/unit/images.test.js b/test/unit/images.test.js new file mode 100644 index 0000000..23e049d --- /dev/null +++ b/test/unit/images.test.js @@ -0,0 +1,104 @@ +import ShortId from 'shortid'; +import { convertText, exportFile, convertFile } from '../../src/convert'; + +describe('Converting img tags', () => { + it('should convert simple img tag', async () => { + const html = ``; + const tex = await convertText(html, { autoGenImageNames: false }); + + expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image.png}\n\\end{center}'); + }); + + it('should convert wrapped img tag', async () => { + const spy = jest.spyOn(ShortId, 'generate'); + spy.mockImplementation(() => 'image2'); + + const html = `

`; + const tex = await convertText(html); + + expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image2.png}\n\\end{center}'); + + spy.mockClear(); + }); + + it('should default to a jpg extension when converting img tag with a image url without a extension', async () => { + const spy = jest.spyOn(ShortId, 'generate'); + spy.mockImplementation(() => 'image2'); + + const html = `

`; + const tex = await convertText(html); + + expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image2.jpg}\n\\end{center}'); + + spy.mockClear(); + }); + + it('should add width restrictions when given', async () => { + const html = ``; + const tex = await convertText(html, { autoGenImageNames: false, imageWidth: '2cm' }); + + expect(tex).toBe( + '\\begin{center}\n\t\\includegraphics[width=2cm]{images/image.png}\n\\end{center}', + ); + }); + + it('should add height restrictions when given', async () => { + const html = ``; + const tex = await convertText(html, { autoGenImageNames: false, imageHeight: '2cm' }); + + expect(tex).toBe( + '\\begin{center}\n\t\\includegraphics[height=2cm]{images/image.png}\n\\end{center}', + ); + }); + + it('should keep aspect ratio when given and width or height are restricted', async () => { + const html = ``; + const tex = await convertText(html, { + autoGenImageNames: false, + imageHeight: '2cm', + keepImageAspectRatio: true, + }); + + expect(tex).toBe( + '\\begin{center}\n\t\\includegraphics[height=2cm,keepaspectratio]{images/image.png}\n\\end{center}', + ); + }); + + it('should ignore aspect ratio when given if width or height are not restricted', async () => { + const html = ``; + const tex = await convertText(html, { autoGenImageNames: false, keepImageAspectRatio: true }); + + expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image.png}\n\\end{center}'); + }); + + it('should not center the image', async () => { + const html = ``; + const tex = await convertText(html, { autoGenImageNames: false, centerImages: false }); + + expect(tex).toBe('\\includegraphics{images/image.png}'); + }); + }); + +describe('Converting with debug flag', () => { + it('should display errors when converting img tag with an inaccessible source url with the debug flag', async () => { + const spy = jest.spyOn(console, 'debug').mockImplementation(); + const html = ``; + + await convertText(html, { autoGenImageNames: false, debug: true }); + + expect(spy).toBeCalledTimes(2); + + spy.mockRestore(); + }); + + it('should not display errors when converting img tag with an inaccessible source url without the debug flag', async () => { + const spy = jest.spyOn(console, 'debug').mockImplementation(); + const html = ``; + + await convertText(html, { autoGenImageNames: false }); + + expect(spy).toBeCalledTimes(0); + + spy.mockRestore(); + }); +}); \ No newline at end of file diff --git a/test/unit/simpleFormatting.test.js b/test/unit/simpleFormatting.test.js new file mode 100644 index 0000000..ca10f52 --- /dev/null +++ b/test/unit/simpleFormatting.test.js @@ -0,0 +1,119 @@ +import { convertText, exportFile, convertFile } from '../../src/convert'; + +describe('Converting Simple Formatting Tags', () => { + + it('should convert a bold tag `b` styling', async () => { + const html = `

Styled Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled \\textbf{Text}'); + }); + + it('should convert a bold tag `strong` styling', async () => { + const html = `

Styled Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled \\textbf{Text}'); + }); + + it('should convert simple text tag with italics styling', async () => { + const html = `

Styled Text

`; + const tex = await convertText(html); + + const html2 = `

Styled Text

`; + const tex2 = await convertText(html2); + + expect(tex).toBe('Styled \\textit{Text}'); + expect(tex2).toBe('Styled \\textit{Text}'); + }); + + it('should convert simple text tag with underline styling', async () => { + const html = `

Styled Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled \\underline{Text}'); + }); + + it('should convert simple text tag with strikethrough styling', async () => { + const html = `

Styled Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled \\sout{Text}'); + }); + + it('should allow nesting', async () => { + const html = `

Styled Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled \\textbf{\\textit{Text}}'); + }); + + it('should convert superscripts and subscripts', async () => { + const html = `

water squared can be writtena as: H2O2

`; + const tex = await convertText(html); + + expect(tex).toBe('water squared can be writtena as: H$_{2}$O$^{2}$'); + }); + +}); + +describe('Converting Equations', () => { + it('should convert superscripts and subscripts', async () => { + const html = `

water squared can be writtena as: H2O2

`; + const tex = await convertText(html); + + expect(tex).toBe('water squared can be writtena as: H$_{2}$O$^{2}$'); + }); + + it('should convert eq wrappers p tags with only an eq to use the \\[ wrapper instead of \\(', async () => { + const html = `

\\(x = 5\\Omega\\)

`; + const tex = await convertText(html); + + expect(tex).toBe('\\[x = 5\\Omega\\]'); + }); + + it('should convert p tags with only an eq to use the \\[ wrapper instead of $', async () => { + const html = `

$x = 5\\Omega$

`; + const tex = await convertText(html); + + expect(tex).toBe('\\[x = 5\\Omega\\]'); + }); + + it('should not convert p tags with only an eq to use the \\[ wrapper instead of \\( if skipWrappingEquations is true', async () => { + const html = `

\\(x = 5\\Omega\\)

`; + const tex = await convertText(html, { skipWrappingEquations: true }); + + expect(tex).toBe('\\(x = 5\\Omega\\)'); + }); + + it('should not convert p tags with only an eq to use the \\[ wrapper instead of $ if skipWrappingEquations is true', async () => { + const html = `

$x = 5\\Omega$

`; + const tex = await convertText(html, { skipWrappingEquations: true }); + + expect(tex).toBe('$x = 5\\Omega$'); + }); + + it('should not modify eq wrappers in p tags with an eq and other content', async () => { + const html = `

Some content $x = 5\\Omega$

`; + const tex = await convertText(html); + + expect(tex).toBe('Some content $x = 5\\Omega$'); + }); + + it('should prefer $ eq wrappers if configuration is given', async () => { + const html = `

Some content \\(x = 5\\Omega\\)

`; + const tex = await convertText(html, { preferDollarInlineMath: true }); + + expect(tex).toBe('Some content $x = 5\\Omega$'); + }); + + it('should handle eqs deep within text without tag wrapping', async () => { + const html = + 'This is some plain text \\(A,{\\rm{ }}B\\) and \\(C\\) with random equations \\(a,{\\rm{ }}b\\) and \\(c\\) \\((a < b < c)\\)'; + const tex = await convertText(html, { preferDollarInlineMath: true }); + + expect(tex).toBe( + 'This is some plain text $A,{\\rm{ }}B$ and $C$ with random equations $a,{\\rm{ }}b$ and $c$ $(a < b < c)$', + ) + }); +}); \ No newline at end of file diff --git a/test/unit/tags.test.js b/test/unit/tags.test.js new file mode 100644 index 0000000..7ccfec9 --- /dev/null +++ b/test/unit/tags.test.js @@ -0,0 +1,174 @@ +import { convertText, exportFile, convertFile } from '../../src/convert'; + +describe('Converting Simple Tags', () => { + + it('should properly convert html tags', async () => { + const html = `Test`; + const tex = await convertText(html); + + expect(tex).toBe('Test'); + }); + + it('should convert heading tags', async () => { + // h4, h5, and h6 are converted to subsubsections. + const html = `

Heading 1

Heading 2

Heading 3

Heading 4

Heading 5
Heading 6
`; + const tex = await convertText(html); + + expect(tex).toBe('\\section*{\\centering{Heading 1}}\n\n\\subsection*{Heading 2}\n\n\\subsubsection*{Heading 3}\n\n\\subsubsection*{Heading 4}\n\n\\subsubsection*{Heading 5}\n\n\\subsubsection*{Heading 6}'); + }); + + it('should convert unordered lists', async () => { + const html = ``; + const tex = await convertText(html); + + expect(tex).toBe('\\begin{itemize}\n\t\\item Item 1\n\t\\item Item 2\n\\end{itemize}'); + }); + + it('should convert ordered lists', async () => { + const html = `
  1. Item 1
  2. Item 2
`; + const tex = await convertText(html); + + expect(tex).toBe('\\begin{enumerate}\n\t\\item Item 1\n\t\\item Item 2\n\\end{enumerate}'); + }); + + it('should convert a table', async () => { + const html = `
Header 1Header 2
Item 1Item 2
`; + const tex = await convertText(html); + + expect(tex).toBe('\\begin{tabular}{|c|c|}\n\t\\hline\n\tHeader 1 & Header 2 \\\\\n\t\\hline\n\tItem 1 & Item 2 \\\\\n\t\\hline\n\t\\end{tabular}'); + }); + + it('should convert code tags', async () => { + const html = `console.log('Hello World!');`; + const tex = await convertText(html); + + expect(tex).toBe('\\begin{lstlisting}\nconsole.log(\'Hello World!\');\n\\end{lstlisting}'); + }); + + + it('should convert horizontal rule', async () => { + const html = `
`; + const tex = await convertText(html); + + expect(tex).toBe('\\hrule\n'); + }); + + it('should convert a link', async () => { + const html = `

visit Google

`; + const tex = await convertText(html); + + expect(tex).toBe('visit \\href{https://www.google.com}{Google}'); + }); + + it('should convert a div', async () => { + const html = `
`; + const tex = await convertText(html); + + expect(tex).toBe(''); + }); + + it('should convert a nested div', async () => { + const html = `
Hello World!
`; + const tex = await convertText(html); + + expect(tex).toBe('Hello World!'); + }); + + it('should convert text tag with span nesting', async () => { + const html = `

Styled Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled Text'); + }); + + it('should convert simple text with `br` tags and the ignoreBreaks argument set to false', async () => { + const html = `Styled
Text`; + const tex = await convertText(html, { ignoreBreaks: false }); + + expect(tex).toBe('Styled\n\nText'); + }); + + it('should convert complex text with `br` tags and the ignoreBreaks argument set to false', async () => { + const html = `Three concentric metal shells
More text here.

Inner p tag

`; + const tex = await convertText(html, { ignoreBreaks: false }); + + expect(tex).toBe('Three concentric metal shells\n\nMore text here.\n\nInner p tag'); + }); + + it('should convert an unknown tag inside text', async () => { + const html = `

I was born on

`; + const tex = await convertText(html); + + expect(tex).toBe('I was born on 31 Dec'); + }); + + it('should convert an unknown tag', async () => { + const html = `Text`; + const tex = await convertText(html); + + expect(tex).toBe('Text'); + }); + + // additional tests (In original, logic is captured above): + // note: section, aside, main, footer, etc, needn't to be handled separately; they can be handled by the default case (unknown/ordinary tag). + it('should properly convert section tags', async () => { + const html = `
Test
`; + const tex = await convertText(html); + + expect(tex).toBe('Test'); + }); + + it('should properly convert aside tags', async () => { + const html = ``; + const tex = await convertText(html); + + expect(tex).toBe('Test'); + }); + + it('should properly convert aside tags', async () => { + const html = `
Test
`; + const tex = await convertText(html); + + expect(tex).toBe('Test'); + }); + + it('should properly convert main tags', async () => { + const html = `
Test
`; + const tex = await convertText(html); + + expect(tex).toBe('Test'); + }); + + +}); + +describe("Converting breaks", () => { + it('should convert simple `p` tag text with `br` tags. These will be ignored by default', async () => { + const html = `

Styled
Text

`; + const tex = await convertText(html); + + expect(tex).toBe('Styled Text'); + }); + + it('should convert simple `p` tag text with `br` tags and the ignoreBreaks argument set to false', async () => { + const html = `

Styled
Text

`; + const tex = await convertText(html, { ignoreBreaks: false }); + + expect(tex).toBe('Styled\n\nText'); + }); + + it('should convert simple text with `\n` and the ignoreBreaks argument set to false', async () => { + const html = `

Styled\nText

`; + const tex = await convertText(html, { ignoreBreaks: false }); + + expect(tex).toBe('Styled\n\nText'); + }); + + it('should convert simple text with `\r` and the ignoreBreaks argument set to false', async () => { + const html = `

Styled\rText

`; + const tex = await convertText(html, { ignoreBreaks: false }); + + expect(tex).toBe('Styled\n\nText'); + }); + +}); \ No newline at end of file