diff --git a/src/codecs/HTML/index.ts b/src/codecs/HTML/index.ts
index 43423d6..97523c7 100644
--- a/src/codecs/HTML/index.ts
+++ b/src/codecs/HTML/index.ts
@@ -5,11 +5,26 @@ export function parse_str(data: string): WJSDoc {
const dom: JSDOM = new JSDOM(data);
const doc: WJSDoc = {p:[]};
const para: WJSPara = {elts:[]};
- para.elts.push({t: "s", v: dom.window.document.querySelector('body').textContent});
+
+ // Assuming first child is always
+ const root = dom.window.document.childNodes[0];
+ dfs(root, para);
doc.p.push(para);
return doc;
}
+const dfs = (element: ChildNode , para: WJSPara): WJSPara => {
+ element.childNodes.forEach(child => {
+ switch (child.nodeName) {
+ case "P":
+ para.elts.push({t: "s", v: child.textContent});
+ default: /*throw `DOCX table unsuported ${child.nodeName} element`*/
+ }
+ dfs(child, para);
+ })
+ return para;
+}
+
export function read(data: Buffer): WJSDoc {
return parse_str(data.toString());
}
\ No newline at end of file