From bf3fe7a59d7dd31df75263ac438322991c26cbef Mon Sep 17 00:00:00 2001 From: Mohammed Sahl Date: Thu, 23 Jul 2020 11:45:50 -0400 Subject: [PATCH] add skeleton for dfs --- src/codecs/HTML/index.ts | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/codecs/HTML/index.ts b/src/codecs/HTML/index.ts index 43423d6..97523c7 100644 --- a/src/codecs/HTML/index.ts +++ b/src/codecs/HTML/index.ts @@ -5,11 +5,26 @@ export function parse_str(data: string): WJSDoc { const dom: JSDOM = new JSDOM(data); const doc: WJSDoc = {p:[]}; const para: WJSPara = {elts:[]}; - para.elts.push({t: "s", v: dom.window.document.querySelector('body').textContent}); + + // Assuming first child is always + const root = dom.window.document.childNodes[0]; + dfs(root, para); doc.p.push(para); return doc; } +const dfs = (element: ChildNode , para: WJSPara): WJSPara => { + element.childNodes.forEach(child => { + switch (child.nodeName) { + case "P": + para.elts.push({t: "s", v: child.textContent}); + default: /*throw `DOCX table unsuported ${child.nodeName} element`*/ + } + dfs(child, para); + }) + return para; +} + export function read(data: Buffer): WJSDoc { return parse_str(data.toString()); } \ No newline at end of file