diff --git a/apps/chrome-extension/src/common/utils.ts b/apps/chrome-extension/src/common/utils.ts index 28590e6..7c65ef6 100644 --- a/apps/chrome-extension/src/common/utils.ts +++ b/apps/chrome-extension/src/common/utils.ts @@ -213,6 +213,167 @@ export const transformGridToHtml = ( } } +const readMergeInfoFromDom = ( + cellBlockIds: number[], +): { rowSpan: number; colSpan: number }[] | null => { + const result: { rowSpan: number; colSpan: number }[] = [] + let hasAnyMerge = false + let missingCount = 0 + + for (const id of cellBlockIds) { + const td = document.querySelector( + `td[data-block-id="${String(id)}"]`, + ) + + if (!td) { + missingCount++ + result.push({ rowSpan: 1, colSpan: 1 }) + continue + } + + if (td.style.display === 'none') { + result.push({ rowSpan: 0, colSpan: 0 }) + continue + } + + const rowSpan = td.rowSpan + const colSpan = td.colSpan + result.push({ rowSpan, colSpan }) + if (rowSpan > 1 || colSpan > 1) hasAnyMerge = true + } + + if (missingCount === cellBlockIds.length) return null + return hasAnyMerge ? result : null +} + +interface TableDataWithBlockInfo { + recordId?: string + cellBlockIds?: number[] + mergeInfo?: { rowSpan: number; colSpan: number }[] + type?: string +} + +export const resolveMergedTablesFromDom = async ( + tableWithParents: TableWithParent[], +): Promise => { + for (const entry of tableWithParents) { + const table = entry.inner + const data = table.data as TableDataWithBlockInfo | undefined + if (data?.mergeInfo) continue + if (data?.type !== BlockType.TABLE) continue + + const cellBlockIds = data.cellBlockIds + if (!cellBlockIds || cellBlockIds.length === 0) continue + + let mergeInfo = readMergeInfoFromDom(cellBlockIds) + + if (!mergeInfo && data.recordId) { + try { + await waitForFunction( + () => + Docx.locateBlockWithRecordId(data.recordId ?? '').then( + isSuccess => + isSuccess && + document.querySelector( + `td[data-block-id="${String(cellBlockIds[0])}"]`, + ) !== null, + ), + { timeout: 3 * Second }, + ) + mergeInfo = readMergeInfoFromDom(cellBlockIds) + } catch { + continue + } + } + + if (!mergeInfo) continue + + table.data = { ...table.data, mergeInfo } + + const allCells = table.children.flatMap(row => row.children) + if (mergeInfo.length === allCells.length) { + allCells.forEach((cell, i) => { + cell.data = { + ...cell.data, + rowSpan: mergeInfo[i].rowSpan, + colSpan: mergeInfo[i].colSpan, + } + }) + } + } +} + +const wrapConsecutiveImages = (html: string): string => + html.replace(/(]*>)(\s*]*>)+/g, match => { + const imgs = match.match(/]*>/g) + if (!imgs || imgs.length < 2) return match + const ths = imgs.map(img => `${img}`).join('') + return `${ths}
` + }) + +const cellContentToHtml = (cell: mdast.TableCell): string => { + const children = cell.data?.invalidChildren ?? cell.children + if (children.length === 0) return '' + + const paragraph: mdast.Paragraph = { + type: 'paragraph', + children: children as mdast.PhrasingContent[], + } + + const html = toHtml(toHast(paragraph, { allowDangerousHtml: true }), { + allowDangerousHtml: true, + }) + .replace(/^

/, '') + .replace(/<\/p>\s*$/, '') + .trim() + + return wrapConsecutiveImages(html) +} + +export const transformMergedTablesToHtml = ( + mergedTables: TableWithParent[], +): void => { + for (const entry of mergedTables) { + const tableIndex = entry.parent?.children.findIndex( + child => child === entry.inner, + ) + if (tableIndex === undefined || tableIndex === -1) continue + + const table = entry.inner + const rows = table.children + + const lines: string[] = [''] + + for (const row of rows) { + lines.push('') + + for (const cell of row.children) { + const rowSpan = cell.data?.rowSpan ?? 1 + const colSpan = cell.data?.colSpan ?? 1 + + if (rowSpan === 0 || colSpan === 0) continue + + const attrs: string[] = [] + if (rowSpan > 1) attrs.push(`rowspan="${String(rowSpan)}"`) + if (colSpan > 1) attrs.push(`colspan="${String(colSpan)}"`) + + const attrStr = attrs.length > 0 ? ' ' + attrs.join(' ') : '' + const content = cellContentToHtml(cell) + lines.push(`${content}`) + } + + lines.push('') + } + + lines.push('
') + + entry.parent?.children.splice(tableIndex, 1, { + type: 'html', + value: lines.join('\n'), + }) + } +} + export const transformMentionUsers = async ( mentionUsers: mdast.InlineCode[], ): Promise => { @@ -254,6 +415,10 @@ export const transformTableWithParents = ( tableWithParents: TableWithParent[], options: TransformTableWithParentsOptions, ): void => { + transformMergedTablesToHtml( + tableWithParents.filter(item => item.inner.data?.mergeInfo), + ) + if (options.transformGridToHtml) { transformGridToHtml( tableWithParents.filter(item => item.inner.data?.type === BlockType.GRID), diff --git a/apps/chrome-extension/src/scripts/copy-lark-docx-as-markdown.ts b/apps/chrome-extension/src/scripts/copy-lark-docx-as-markdown.ts index 19c9a28..6ca88d0 100644 --- a/apps/chrome-extension/src/scripts/copy-lark-docx-as-markdown.ts +++ b/apps/chrome-extension/src/scripts/copy-lark-docx-as-markdown.ts @@ -8,6 +8,7 @@ import { reportBug } from '../common/issue' import { transformMentionUsers, transformTableWithParents, + resolveMergedTablesFromDom, } from '../common/utils' import { getSettings, @@ -93,6 +94,7 @@ const main = async () => { ) await transformMentionUsers(mentionUsers) + await resolveMergedTablesFromDom(tableWithParents) const tokens = images .map(image => { diff --git a/apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts b/apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts index 403f218..65d279f 100644 --- a/apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts +++ b/apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts @@ -14,6 +14,7 @@ import { UniqueFileName, withSignal, transformTableWithParents, + resolveMergedTablesFromDom, } from '../common/utils' import { getSettings, @@ -565,6 +566,7 @@ const main = async (options: { signal?: AbortSignal } = {}) => { }) await transformMentionUsers(mentionUsers) + await resolveMergedTablesFromDom(tableWithParents) const recommendName = docx.pageTitle ? normalizeFileName(docx.pageTitle.slice(0, OneHundred)) diff --git a/apps/chrome-extension/src/scripts/view-lark-docx-as-markdown.ts b/apps/chrome-extension/src/scripts/view-lark-docx-as-markdown.ts index 5fdab0b..6ee5c37 100644 --- a/apps/chrome-extension/src/scripts/view-lark-docx-as-markdown.ts +++ b/apps/chrome-extension/src/scripts/view-lark-docx-as-markdown.ts @@ -8,6 +8,7 @@ import { reportBug } from '../common/issue' import { transformMentionUsers, transformTableWithParents, + resolveMergedTablesFromDom, } from '../common/utils' import { getSettings, @@ -98,6 +99,7 @@ const main = async () => { ) await transformMentionUsers(mentionUsers) + await resolveMergedTablesFromDom(tableWithParents) const tokens = images .map(image => { diff --git a/packages/lark/src/docx.ts b/packages/lark/src/docx.ts index b025fc2..e27fbb8 100644 --- a/packages/lark/src/docx.ts +++ b/packages/lark/src/docx.ts @@ -49,11 +49,16 @@ declare module 'mdast' { type?: BlockType.TABLE | BlockType.GRID colWidths?: number[] invalid?: boolean + mergeInfo?: { rowSpan: number; colSpan: number }[] + recordId?: string + cellBlockIds?: number[] } interface TableCellData { width?: number invalidChildren?: mdast.Nodes[] + rowSpan?: number + colSpan?: number } interface InlineCodeData { @@ -265,12 +270,18 @@ interface ImageBlock extends Block { } } +interface MergeInfo { + row_span: number + col_span: number +} + interface TableBlock extends Block { type: BlockType.TABLE snapshot: { type: BlockType.TABLE rows_id: string[] columns_id: string[] + merge_info?: MergeInfo[] } } @@ -1120,6 +1131,21 @@ export class Transformer { }, ) {} + private resolveMergeInfo( + block: TableBlock | Grid, + ): { rowSpan: number; colSpan: number }[] | undefined { + if (block.type !== BlockType.TABLE) return undefined + + if (block.snapshot.merge_info) { + return block.snapshot.merge_info.map(info => ({ + rowSpan: info.row_span, + colSpan: info.col_span, + })) + } + + return undefined + } + private normalizeImage(image: mdast.Image): mdast.Image | mdast.Paragraph { return this.parent?.type === 'tableCell' ? image @@ -1434,6 +1460,8 @@ export class Transformer { } case BlockType.TABLE: case BlockType.GRID: { + const mergeInfo = this.resolveMergeInfo(block) + let table: mdast.Table = { type: 'table', children: [], @@ -1456,11 +1484,35 @@ export class Transformer { ? widthCells.map(cell => cell.data.width) : undefined + if (mergeInfo && mergeInfo.length === tableCells.length) { + tableCells.forEach((cell, i) => { + cell.data = { + ...cell.data, + rowSpan: mergeInfo[i].rowSpan, + colSpan: mergeInfo[i].colSpan, + } + }) + } + + const hasMergedCells = + mergeInfo?.some(info => info.rowSpan > 1 || info.colSpan > 1) ?? + false + + const cellBlockIds = + block.type === BlockType.TABLE + ? block.children.map(child => child.id) + : undefined + const recordId = + block.type === BlockType.TABLE ? block.record?.id : undefined + table.data = { ...table.data, type: block.type, ...(colWidths ? { colWidths } : {}), invalid: tableCells.some(cell => cell.data?.invalidChildren), + ...(hasMergedCells ? { mergeInfo } : {}), + ...(cellBlockIds ? { cellBlockIds } : {}), + ...(recordId ? { recordId } : {}), } return ( diff --git a/packages/lark/tests/docx.test.ts b/packages/lark/tests/docx.test.ts index 9885ba0..5bb6a79 100644 --- a/packages/lark/tests/docx.test.ts +++ b/packages/lark/tests/docx.test.ts @@ -1467,6 +1467,206 @@ describe('trim end enter', () => { }) }) +describe('table with merge_info', () => { + test('passes mergeInfo to table data when merge_info exists', () => { + const { root, tableWithParents } = transformer.transform({ + type: BlockType.PAGE, + snapshot: { + type: BlockType.PAGE, + }, + children: [ + { + type: BlockType.TABLE, + snapshot: { + type: BlockType.TABLE, + rows_id: ['r1', 'r2'], + columns_id: ['c1', 'c2'], + merge_info: [ + { row_span: 1, col_span: 1 }, + { row_span: 1, col_span: 1 }, + { row_span: 1, col_span: 1 }, + { row_span: 1, col_span: 1 }, + ], + }, + children: [ + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'A', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'B', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'C', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'D', attributes: {} }] }, + }, + children: [], + }, + ], + }, + ], + }) + + expect(tableWithParents).toHaveLength(1) + expect(root.children).toHaveLength(1) + + const table = tableWithParents[0].inner + expect(table.data?.mergeInfo).toBeUndefined() + expect(table.children).toHaveLength(2) + expect(table.children[0].children).toHaveLength(2) + expect(table.children[1].children).toHaveLength(2) + }) + + test('sets mergeInfo on table data when cells have rowspan/colspan > 1', () => { + const { tableWithParents } = transformer.transform({ + type: BlockType.PAGE, + snapshot: { + type: BlockType.PAGE, + }, + children: [ + { + type: BlockType.TABLE, + snapshot: { + type: BlockType.TABLE, + rows_id: ['r1', 'r2'], + columns_id: ['c1', 'c2'], + merge_info: [ + { row_span: 2, col_span: 1 }, + { row_span: 1, col_span: 1 }, + { row_span: 0, col_span: 0 }, + { row_span: 1, col_span: 1 }, + ], + }, + children: [ + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'merged', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'B', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'D', attributes: {} }] }, + }, + children: [], + }, + ], + }, + ], + }) + + expect(tableWithParents).toHaveLength(1) + + const table = tableWithParents[0].inner + expect(table.type).toBe('table') + expect(table.data?.mergeInfo).toStrictEqual([ + { rowSpan: 2, colSpan: 1 }, + { rowSpan: 1, colSpan: 1 }, + { rowSpan: 0, colSpan: 0 }, + { rowSpan: 1, colSpan: 1 }, + ]) + + const cell0 = table.children[0].children[0] + expect(cell0.data?.rowSpan).toBe(2) + expect(cell0.data?.colSpan).toBe(1) + + const cell2 = table.children[1].children[0] + expect(cell2.data?.rowSpan).toBe(0) + expect(cell2.data?.colSpan).toBe(0) + }) + + test('no mergeInfo when merge_info is absent from snapshot', () => { + const { tableWithParents } = transformer.transform({ + type: BlockType.PAGE, + snapshot: { + type: BlockType.PAGE, + }, + children: [ + { + type: BlockType.TABLE, + snapshot: { + type: BlockType.TABLE, + rows_id: ['r1'], + columns_id: ['c1', 'c2'], + }, + children: [ + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'A', attributes: {} }] }, + }, + children: [], + }, + { + type: BlockType.CELL, + snapshot: { type: BlockType.CELL }, + zoneState: { + allText: '', + content: { ops: [{ insert: 'B', attributes: {} }] }, + }, + children: [], + }, + ], + }, + ], + }) + + expect(tableWithParents).toHaveLength(1) + const table = tableWithParents[0].inner + expect(table.type).toBe('table') + expect(table.data?.mergeInfo).toBeUndefined() + }) +}) + describe('inline math', () => { test('inline equation with a single character', () => { expect(