From 91b80342f69559b67666b3edcb217faf5d0ca57e Mon Sep 17 00:00:00 2001 From: phoenix Date: Tue, 24 Mar 2026 15:49:32 +0800 Subject: [PATCH 1/3] feat(lark, chrome-extension): support merged table cells with rowspan/colspan HTML output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GFM markdown tables cannot represent merged cells. When a Lark table contains rowspan/colspan merges, it is now converted to an HTML with proper attributes instead. Merge info is resolved by reading rowspan/colspan from the rendered DOM ') - } else { - lines.push(`${content}`) - } + const content = cellContentToHtml(cell) + lines.push(`${content}`) } lines.push('') From a5d8034b031c55a24d4c714e492ea5db1cdbef8f Mon Sep 17 00:00:00 2001 From: phoenix Date: Tue, 24 Mar 2026 19:07:57 +0800 Subject: [PATCH 3/3] fix(chrome-extension): wrap consecutive images in horizontal table layout When a merged table cell contains multiple consecutive images, wrap them in a
elements. For tables not in the viewport (virtual scrolling), the extension scrolls them into view via locateBlockWithRecordId before reading — following the same pattern as transformMentionUsers. --- apps/chrome-extension/src/common/utils.ts | 175 +++++++++++++++ .../src/scripts/copy-lark-docx-as-markdown.ts | 2 + .../scripts/download-lark-docx-as-markdown.ts | 2 + .../src/scripts/view-lark-docx-as-markdown.ts | 2 + packages/lark/src/docx.ts | 52 +++++ packages/lark/tests/docx.test.ts | 200 ++++++++++++++++++ 6 files changed, 433 insertions(+) diff --git a/apps/chrome-extension/src/common/utils.ts b/apps/chrome-extension/src/common/utils.ts index 28590e6..df8e1db 100644 --- a/apps/chrome-extension/src/common/utils.ts +++ b/apps/chrome-extension/src/common/utils.ts @@ -213,6 +213,177 @@ export const transformGridToHtml = ( } } +const readMergeInfoFromDom = ( + cellBlockIds: number[], +): { rowSpan: number; colSpan: number }[] | null => { + const result: { rowSpan: number; colSpan: number }[] = [] + let hasAnyMerge = false + let missingCount = 0 + + for (const id of cellBlockIds) { + const td = document.querySelector( + `td[data-block-id="${String(id)}"]`, + ) + + if (!td) { + missingCount++ + result.push({ rowSpan: 1, colSpan: 1 }) + continue + } + + if (td.style.display === 'none') { + result.push({ rowSpan: 0, colSpan: 0 }) + continue + } + + const rowSpan = td.rowSpan + const colSpan = td.colSpan + result.push({ rowSpan, colSpan }) + if (rowSpan > 1 || colSpan > 1) hasAnyMerge = true + } + + if (missingCount === cellBlockIds.length) return null + return hasAnyMerge ? result : null +} + +interface TableDataWithBlockInfo { + recordId?: string + cellBlockIds?: number[] + mergeInfo?: { rowSpan: number; colSpan: number }[] + type?: string +} + +export const resolveMergedTablesFromDom = async ( + tableWithParents: TableWithParent[], +): Promise => { + for (const entry of tableWithParents) { + const table = entry.inner + const data = table.data as TableDataWithBlockInfo | undefined + if (data?.mergeInfo) continue + if (data?.type !== BlockType.TABLE) continue + + const cellBlockIds = data.cellBlockIds + if (!cellBlockIds || cellBlockIds.length === 0) continue + + let mergeInfo = readMergeInfoFromDom(cellBlockIds) + + if (!mergeInfo && data.recordId) { + try { + await waitForFunction( + () => + Docx.locateBlockWithRecordId(data.recordId ?? '').then( + isSuccess => + isSuccess && + document.querySelector( + `td[data-block-id="${String(cellBlockIds[0])}"]`, + ) !== null, + ), + { timeout: 3 * Second }, + ) + mergeInfo = readMergeInfoFromDom(cellBlockIds) + } catch { + continue + } + } + + if (!mergeInfo) continue + + table.data = { ...table.data, mergeInfo } + + const allCells = table.children.flatMap(row => row.children) + if (mergeInfo.length === allCells.length) { + allCells.forEach((cell, i) => { + cell.data = { + ...cell.data, + rowSpan: mergeInfo[i].rowSpan, + colSpan: mergeInfo[i].colSpan, + } + }) + } + } +} + +const cellContentToMarkdown = (cell: mdast.TableCell): string => { + const children = cell.data?.invalidChildren ?? cell.children + if (children.length === 0) return '' + + const root: mdast.Root = { + type: 'root', + children: children.map(child => { + if ( + child.type === 'text' || + child.type === 'emphasis' || + child.type === 'strong' || + child.type === 'inlineCode' || + child.type === 'delete' || + child.type === 'link' || + child.type === 'image' || + child.type === 'html' || + child.type === 'break' + ) { + return { type: 'paragraph', children: [child] } as mdast.Paragraph + } + return child as mdast.RootContent + }), + } + + return Docx.stringify(root).trim() +} + +export const transformMergedTablesToHtml = ( + mergedTables: TableWithParent[], +): void => { + for (const entry of mergedTables) { + const tableIndex = entry.parent?.children.findIndex( + child => child === entry.inner, + ) + if (tableIndex === undefined || tableIndex === -1) continue + + const table = entry.inner + const rows = table.children + + const lines: string[] = [''] + + for (const row of rows) { + lines.push('') + + for (const cell of row.children) { + const rowSpan = cell.data?.rowSpan ?? 1 + const colSpan = cell.data?.colSpan ?? 1 + + if (rowSpan === 0 || colSpan === 0) continue + + const attrs: string[] = [] + if (rowSpan > 1) attrs.push(`rowspan="${String(rowSpan)}"`) + if (colSpan > 1) attrs.push(`colspan="${String(colSpan)}"`) + + const attrStr = attrs.length > 0 ? ' ' + attrs.join(' ') : '' + const content = cellContentToMarkdown(cell) + const isMultiline = content.includes('\n') + + if (isMultiline) { + lines.push(``) + lines.push('') + lines.push(content) + lines.push('') + lines.push('') + } else { + lines.push(`${content}`) + } + } + + lines.push('') + } + + lines.push('
') + + entry.parent?.children.splice(tableIndex, 1, { + type: 'html', + value: lines.join('\n'), + }) + } +} + export const transformMentionUsers = async ( mentionUsers: mdast.InlineCode[], ): Promise => { @@ -254,6 +425,10 @@ export const transformTableWithParents = ( tableWithParents: TableWithParent[], options: TransformTableWithParentsOptions, ): void => { + transformMergedTablesToHtml( + tableWithParents.filter(item => item.inner.data?.mergeInfo), + ) + if (options.transformGridToHtml) { transformGridToHtml( tableWithParents.filter(item => item.inner.data?.type === BlockType.GRID), diff --git a/apps/chrome-extension/src/scripts/copy-lark-docx-as-markdown.ts b/apps/chrome-extension/src/scripts/copy-lark-docx-as-markdown.ts index 19c9a28..6ca88d0 100644 --- a/apps/chrome-extension/src/scripts/copy-lark-docx-as-markdown.ts +++ b/apps/chrome-extension/src/scripts/copy-lark-docx-as-markdown.ts @@ -8,6 +8,7 @@ import { reportBug } from '../common/issue' import { transformMentionUsers, transformTableWithParents, + resolveMergedTablesFromDom, } from '../common/utils' import { getSettings, @@ -93,6 +94,7 @@ const main = async () => { ) await transformMentionUsers(mentionUsers) + await resolveMergedTablesFromDom(tableWithParents) const tokens = images .map(image => { diff --git a/apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts b/apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts index 403f218..65d279f 100644 --- a/apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts +++ b/apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts @@ -14,6 +14,7 @@ import { UniqueFileName, withSignal, transformTableWithParents, + resolveMergedTablesFromDom, } from '../common/utils' import { getSettings, @@ -565,6 +566,7 @@ const main = async (options: { signal?: AbortSignal } = {}) => { }) await transformMentionUsers(mentionUsers) + await resolveMergedTablesFromDom(tableWithParents) const recommendName = docx.pageTitle ? normalizeFileName(docx.pageTitle.slice(0, OneHundred)) diff --git a/apps/chrome-extension/src/scripts/view-lark-docx-as-markdown.ts b/apps/chrome-extension/src/scripts/view-lark-docx-as-markdown.ts index 5fdab0b..6ee5c37 100644 --- a/apps/chrome-extension/src/scripts/view-lark-docx-as-markdown.ts +++ b/apps/chrome-extension/src/scripts/view-lark-docx-as-markdown.ts @@ -8,6 +8,7 @@ import { reportBug } from '../common/issue' import { transformMentionUsers, transformTableWithParents, + resolveMergedTablesFromDom, } from '../common/utils' import { getSettings, @@ -98,6 +99,7 @@ const main = async () => { ) await transformMentionUsers(mentionUsers) + await resolveMergedTablesFromDom(tableWithParents) const tokens = images .map(image => { diff --git a/packages/lark/src/docx.ts b/packages/lark/src/docx.ts index b025fc2..e27fbb8 100644 --- a/packages/lark/src/docx.ts +++ b/packages/lark/src/docx.ts @@ -49,11 +49,16 @@ declare module 'mdast' { type?: BlockType.TABLE | BlockType.GRID colWidths?: number[] invalid?: boolean + mergeInfo?: { rowSpan: number; colSpan: number }[] + recordId?: string + cellBlockIds?: number[] } interface TableCellData { width?: number invalidChildren?: mdast.Nodes[] + rowSpan?: number + colSpan?: number } interface InlineCodeData { @@ -265,12 +270,18 @@ interface ImageBlock extends Block { } } +interface MergeInfo { + row_span: number + col_span: number +} + interface TableBlock extends Block { type: BlockType.TABLE snapshot: { type: BlockType.TABLE rows_id: string[] columns_id: string[] + merge_info?: MergeInfo[] } } @@ -1120,6 +1131,21 @@ export class Transformer { }, ) {} + private resolveMergeInfo( + block: TableBlock | Grid, + ): { rowSpan: number; colSpan: number }[] | undefined { + if (block.type !== BlockType.TABLE) return undefined + + if (block.snapshot.merge_info) { + return block.snapshot.merge_info.map(info => ({ + rowSpan: info.row_span, + colSpan: info.col_span, + })) + } + + return undefined + } + private normalizeImage(image: mdast.Image): mdast.Image | mdast.Paragraph { return this.parent?.type === 'tableCell' ? image @@ -1434,6 +1460,8 @@ export class Transformer { } case BlockType.TABLE: case BlockType.GRID: { + const mergeInfo = this.resolveMergeInfo(block) + let table: mdast.Table = { type: 'table', children: [], @@ -1456,11 +1484,35 @@ export class Transformer { ? widthCells.map(cell => cell.data.width) : undefined + if (mergeInfo && mergeInfo.length === tableCells.length) { + tableCells.forEach((cell, i) => { + cell.data = { + ...cell.data, + rowSpan: mergeInfo[i].rowSpan, + colSpan: mergeInfo[i].colSpan, + } + }) + } + + const hasMergedCells = + mergeInfo?.some(info => info.rowSpan > 1 || info.colSpan > 1) ?? + false + + const cellBlockIds = + block.type === BlockType.TABLE + ? block.children.map(child => child.id) + : undefined + const recordId = + block.type === BlockType.TABLE ? block.record?.id : undefined + table.data = { ...table.data, type: block.type, ...(colWidths ? { colWidths } : {}), invalid: tableCells.some(cell => cell.data?.invalidChildren), + ...(hasMergedCells ? { mergeInfo } : {}), + ...(cellBlockIds ? { cellBlockIds } : {}), + ...(recordId ? { recordId } : {}), } return ( diff --git a/packages/lark/tests/docx.test.ts b/packages/lark/tests/docx.test.ts index 9885ba0..5bb6a79 100644 --- a/packages/lark/tests/docx.test.ts +++ b/packages/lark/tests/docx.test.ts @@ -1467,6 +1467,206 @@ describe('trim end enter', () => { }) }) +describe('table with merge_info', () => { + test('passes mergeInfo to table data when merge_info exists', () => { + const { root, tableWithParents } = transformer.transform({ + type: BlockType.PAGE, + snapshot: { + type: BlockType.PAGE, + }, + children: [ + { + type: BlockType.TABLE, + snapshot: { + type: BlockType.TABLE, + rows_id: ['r1', 'r2'], + columns_id: ['c1', 'c2'], + merge_info: [ + { row_span: 1, col_span: 1 }, + { row_span: 1, col_span: 1 }, + { row_span: 1, col_span: 1 }, + { row_span: 1, col_span: 1 }, + ], + }, + children: [ + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'A', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'B', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'C', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'D', attributes: {} }] }, + }, + children: [], + }, + ], + }, + ], + }) + + expect(tableWithParents).toHaveLength(1) + expect(root.children).toHaveLength(1) + + const table = tableWithParents[0].inner + expect(table.data?.mergeInfo).toBeUndefined() + expect(table.children).toHaveLength(2) + expect(table.children[0].children).toHaveLength(2) + expect(table.children[1].children).toHaveLength(2) + }) + + test('sets mergeInfo on table data when cells have rowspan/colspan > 1', () => { + const { tableWithParents } = transformer.transform({ + type: BlockType.PAGE, + snapshot: { + type: BlockType.PAGE, + }, + children: [ + { + type: BlockType.TABLE, + snapshot: { + type: BlockType.TABLE, + rows_id: ['r1', 'r2'], + columns_id: ['c1', 'c2'], + merge_info: [ + { row_span: 2, col_span: 1 }, + { row_span: 1, col_span: 1 }, + { row_span: 0, col_span: 0 }, + { row_span: 1, col_span: 1 }, + ], + }, + children: [ + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'merged', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'B', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'D', attributes: {} }] }, + }, + children: [], + }, + ], + }, + ], + }) + + expect(tableWithParents).toHaveLength(1) + + const table = tableWithParents[0].inner + expect(table.type).toBe('table') + expect(table.data?.mergeInfo).toStrictEqual([ + { rowSpan: 2, colSpan: 1 }, + { rowSpan: 1, colSpan: 1 }, + { rowSpan: 0, colSpan: 0 }, + { rowSpan: 1, colSpan: 1 }, + ]) + + const cell0 = table.children[0].children[0] + expect(cell0.data?.rowSpan).toBe(2) + expect(cell0.data?.colSpan).toBe(1) + + const cell2 = table.children[1].children[0] + expect(cell2.data?.rowSpan).toBe(0) + expect(cell2.data?.colSpan).toBe(0) + }) + + test('no mergeInfo when merge_info is absent from snapshot', () => { + const { tableWithParents } = transformer.transform({ + type: BlockType.PAGE, + snapshot: { + type: BlockType.PAGE, + }, + children: [ + { + type: BlockType.TABLE, + snapshot: { + type: BlockType.TABLE, + rows_id: ['r1'], + columns_id: ['c1', 'c2'], + }, + children: [ + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'A', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'B', attributes: {} }] }, + }, + children: [], + }, + ], + }, + ], + }) + + expect(tableWithParents).toHaveLength(1) + const table = tableWithParents[0].inner + expect(table.type).toBe('table') + expect(table.data?.mergeInfo).toBeUndefined() + }) +}) + describe('inline math', () => { test('inline equation with a single character', () => { expect( From 2adf832a2a37f126a0ee48fda8fde5fa5d1dd163 Mon Sep 17 00:00:00 2001 From: phoenix Date: Tue, 24 Mar 2026 17:57:40 +0800 Subject: [PATCH 2/3] fix(chrome-extension): render merged table cell content as HTML instead of markdown Markdown syntax like **bold** inside
tags is not parsed by renderers. Convert cell content to HTML (e.g. ) via the mdast-to-hast-to-html pipeline so formatting is preserved. --- apps/chrome-extension/src/common/utils.ts | 44 +++++++---------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/apps/chrome-extension/src/common/utils.ts b/apps/chrome-extension/src/common/utils.ts index df8e1db..ae6444f 100644 --- a/apps/chrome-extension/src/common/utils.ts +++ b/apps/chrome-extension/src/common/utils.ts @@ -303,31 +303,21 @@ export const resolveMergedTablesFromDom = async ( } } -const cellContentToMarkdown = (cell: mdast.TableCell): string => { +const cellContentToHtml = (cell: mdast.TableCell): string => { const children = cell.data?.invalidChildren ?? cell.children if (children.length === 0) return '' - const root: mdast.Root = { - type: 'root', - children: children.map(child => { - if ( - child.type === 'text' || - child.type === 'emphasis' || - child.type === 'strong' || - child.type === 'inlineCode' || - child.type === 'delete' || - child.type === 'link' || - child.type === 'image' || - child.type === 'html' || - child.type === 'break' - ) { - return { type: 'paragraph', children: [child] } as mdast.Paragraph - } - return child as mdast.RootContent - }), + const paragraph: mdast.Paragraph = { + type: 'paragraph', + children: children as mdast.PhrasingContent[], } - return Docx.stringify(root).trim() + return toHtml(toHast(paragraph, { allowDangerousHtml: true }), { + allowDangerousHtml: true, + }) + .replace(/^

/, '') + .replace(/<\/p>\s*$/, '') + .trim() } export const transformMergedTablesToHtml = ( @@ -358,18 +348,8 @@ export const transformMergedTablesToHtml = ( if (colSpan > 1) attrs.push(`colspan="${String(colSpan)}"`) const attrStr = attrs.length > 0 ? ' ' + attrs.join(' ') : '' - const content = cellContentToMarkdown(cell) - const isMultiline = content.includes('\n') - - if (isMultiline) { - lines.push(``) - lines.push('') - lines.push(content) - lines.push('') - lines.push('

`).join('') + return `
structure for horizontal display, matching the behavior of the original plugin's invalid table HTML output. --- apps/chrome-extension/src/common/utils.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/apps/chrome-extension/src/common/utils.ts b/apps/chrome-extension/src/common/utils.ts index ae6444f..7c65ef6 100644 --- a/apps/chrome-extension/src/common/utils.ts +++ b/apps/chrome-extension/src/common/utils.ts @@ -303,6 +303,14 @@ export const resolveMergedTablesFromDom = async ( } } +const wrapConsecutiveImages = (html: string): string => + html.replace(/(]*>)(\s*]*>)+/g, match => { + const imgs = match.match(/]*>/g) + if (!imgs || imgs.length < 2) return match + const ths = imgs.map(img => `${img}
${ths}
` + }) + const cellContentToHtml = (cell: mdast.TableCell): string => { const children = cell.data?.invalidChildren ?? cell.children if (children.length === 0) return '' @@ -312,12 +320,14 @@ const cellContentToHtml = (cell: mdast.TableCell): string => { children: children as mdast.PhrasingContent[], } - return toHtml(toHast(paragraph, { allowDangerousHtml: true }), { + const html = toHtml(toHast(paragraph, { allowDangerousHtml: true }), { allowDangerousHtml: true, }) .replace(/^

/, '') .replace(/<\/p>\s*$/, '') .trim() + + return wrapConsecutiveImages(html) } export const transformMergedTablesToHtml = (