diff --git a/index.js b/index.js index 1713a3b..ed0884e 100644 --- a/index.js +++ b/index.js @@ -435,6 +435,10 @@ class LLMWhispererClientV2 { * @param {string} [options.useWebhook=''] - Whether to use a webhook. * @param {boolean} [options.waitForCompletion=false] - Whether to wait for completion. * @param {number} [options.waitTimeout=180] - The timeout for waiting. + * @param {boolean} [options.addLineNos=false] - If true, adds line numbers to the extracted text + * and saves line metadata, which can be queried later + * using the highlights API. + * @returns {Promise} The response from the whisper API. * @throws {LLMWhispererClientException} If there is an error in the request. */ @@ -459,6 +463,7 @@ class LLMWhispererClientV2 { useWebhook = "", waitForCompletion = false, waitTimeout = 180, + addLineNos = false, } = {}) { this.logger.debug("whisper called"); const apiUrl = `${this.baseUrl}/whisper`; @@ -482,6 +487,7 @@ class LLMWhispererClientV2 { use_webhook: useWebhook, wait_for_completion: waitForCompletion, wait_timeout: waitTimeout, + add_line_nos: addLineNos, }; this.logger.debug(`api_url: ${apiUrl}`); @@ -741,6 +747,55 @@ class LLMWhispererClientV2 { }; } } + + /** + * Retrieves the highlight information of the LLMWhisperer API. + * + * This method sends a GET request to the '/highlights' endpoint of the LLMWhisperer API. + * The response is a JSON object containing the usage information. + * Refer to https://docs.unstract.com/llm_whisperer/apis/llm_whisperer_usage_api + * + * @param {string} whisperHash - The hash of the whisper operation. + * @param {string} lines - Define which lines metadata to retrieve. + * Example "1-5,7,21-" retrieves lines 1,2,3,4,5,7,21,22,23,... + * @param {boolean} [extractAllLines=false] - If true, extract all lines. + * @returns {Promise} A promise that resolves with the highlight information. + * @throws {LLMWhispererClientException} If the API request fails. + */ + async getHighlightData(whisperHash, lines, extractAllLines = false) { + this.logger.debug("highlight called"); + const url = `${this.baseUrl}/highlights`; + + // Build query parameters + const params = { + whisper_hash: whisperHash, + lines: lines, + extract_all_lines: extractAllLines, + }; + + try { + const response = await axios(url, { + method: "GET", + headers: this.headers, + params: params, + }); + + if (response.status != 200) { + // Parse error response and throw a custom exception + const errorData = await response.data; + errorData.status_code = response.status; + throw new LLMWhispererClientException(errorData); + } + + return response.data; + } catch (error) { + const err = error.response + ? error.response.data + : { message: error.message }; + err.statusCode = error.response ? error.response.status : -1; + throw new LLMWhispererClientException(err.message, err.statusCode); + } + } } module.exports = { diff --git a/package.json b/package.json index 4a1292c..08abac5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "llmwhisperer-client", - "version": "2.1.0", + "version": "2.2.0", "description": "LLMWhisper JS Client", "main": "index.js", "scripts": { diff --git a/test/test.js b/test/test.js index 72f9101..b2a8e4b 100644 --- a/test/test.js +++ b/test/test.js @@ -63,5 +63,48 @@ describe("LLMWhispererClientV2", () => { }, 200000, ); + + test("highlight", async () => { + const dataDir = path.join(__dirname, "data"); + const inputFile = "credit_card.pdf"; + const filePath = path.join(dataDir, inputFile); + + // Call whisper API with line numbers enabled + const whisperResult = await client.whisper({ + addLineNos: true, + filePath: filePath, + waitForCompletion: true, + }); + + const whisperHash = whisperResult.whisper_hash; + + // Fetch highlight data for lines 1-2 + const highlightData = await client.getHighlightData(whisperHash, "1-2"); + + // Validate the response structure + expect(typeof highlightData).toBe("object"); + expect(Object.keys(highlightData).length).toBe(2); + expect(highlightData).toHaveProperty("1"); + expect(highlightData).toHaveProperty("2"); + + // Validate line 1 data + const line1 = highlightData["1"]; + expect(line1.base_y).toBe(0); + expect(line1.base_y_percent).toBe(0); + expect(line1.height).toBe(0); + expect(line1.height_percent).toBe(0); + expect(line1.page).toBe(0); + expect(line1.page_height).toBe(0); + expect(line1.raw).toEqual([0, 0, 0, 0]); + + // Validate line 2 data + const line2 = highlightData["2"]; + expect(line2.base_y).toBe(155); + expect(line2.base_y_percent).toBeCloseTo(4.8927, 4); // Approximate float comparison + expect(line2.height).toBe(51); + expect(line2.height_percent).toBeCloseTo(1.6098, 4); // Approximate float comparison + expect(line2.page).toBe(0); + expect(line2.page_height).toBe(3168); + }, 20000); // 20-second timeout });