diff --git a/src/codecs/HTML/index.ts b/src/codecs/HTML/index.ts index 43423d6..97523c7 100644 --- a/src/codecs/HTML/index.ts +++ b/src/codecs/HTML/index.ts @@ -5,11 +5,26 @@ export function parse_str(data: string): WJSDoc { const dom: JSDOM = new JSDOM(data); const doc: WJSDoc = {p:[]}; const para: WJSPara = {elts:[]}; - para.elts.push({t: "s", v: dom.window.document.querySelector('body').textContent}); + + // Assuming first child is always + const root = dom.window.document.childNodes[0]; + dfs(root, para); doc.p.push(para); return doc; } +const dfs = (element: ChildNode , para: WJSPara): WJSPara => { + element.childNodes.forEach(child => { + switch (child.nodeName) { + case "P": + para.elts.push({t: "s", v: child.textContent}); + default: /*throw `DOCX table unsuported ${child.nodeName} element`*/ + } + dfs(child, para); + }) + return para; +} + export function read(data: Buffer): WJSDoc { return parse_str(data.toString()); } \ No newline at end of file