From 44a5c6c0514098ffd3e6a00351241d15bcc7aa6b Mon Sep 17 00:00:00 2001 From: jagadeeswaran-zipstack Date: Tue, 25 Feb 2025 01:36:16 +0530 Subject: [PATCH 1/4] added support Highlight API --- index.js | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ test/test.js | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/index.js b/index.js index 1713a3b..a7d0ce8 100644 --- a/index.js +++ b/index.js @@ -435,6 +435,10 @@ class LLMWhispererClientV2 { * @param {string} [options.useWebhook=''] - Whether to use a webhook. * @param {boolean} [options.waitForCompletion=false] - Whether to wait for completion. * @param {number} [options.waitTimeout=180] - The timeout for waiting. + * @param {boolean} [addLineNos=false] - If true, adds line numbers to the extracted text + * and saves line metadata, which can be queried later + * using the highlights API. + * @returns {Promise} The response from the whisper API. * @throws {LLMWhispererClientException} If there is an error in the request. */ @@ -459,6 +463,7 @@ class LLMWhispererClientV2 { useWebhook = "", waitForCompletion = false, waitTimeout = 180, + addLineNos = false, } = {}) { this.logger.debug("whisper called"); const apiUrl = `${this.baseUrl}/whisper`; @@ -482,6 +487,7 @@ class LLMWhispererClientV2 { use_webhook: useWebhook, wait_for_completion: waitForCompletion, wait_timeout: waitTimeout, + add_line_nos: addLineNos, }; this.logger.debug(`api_url: ${apiUrl}`); @@ -741,6 +747,65 @@ class LLMWhispererClientV2 { }; } } + + /** + * Retrieves the highlight information of the LLMWhisperer API. + * + * This method sends a GET request to the '/highlights' endpoint of the LLMWhisperer API. + * The response is a JSON object containing the usage information. + * Refer to https://docs.unstract.com/llm_whisperer/apis/llm_whisperer_usage_api + * + * @param {string} whisperHash - The hash of the whisper operation. + * @param {string} lines - Define which lines metadata to retrieve. + * Example "1-5,7,21-" retrieves lines 1,2,3,4,5,7,21,22,23,... + * @param {boolean} [extractAllLines=false] - If true, extract all lines. + * @returns {Promise} A promise that resolves with the highlight information. + * @throws {LLMWhispererClientException} If the API request fails. + */ + async getHighlightData(whisperHash, lines, extractAllLines = false) { + console.debug("highlight called"); + const url = `${this.baseUrl}/highlights`; + + // Build query parameters + const params = new URLSearchParams({ + whisper_hash: whisperHash, + lines: lines, + extract_all_lines: extractAllLines.toString(), + }); + const finalUrl = `${url}?${params.toString()}`; + console.debug("url:", finalUrl); + + // Set up the AbortController for timeout support + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), this.apiTimeout); + + try { + const response = await fetch(finalUrl, { + method: "GET", + headers: this.headers, + signal: controller.signal, + }); + clearTimeout(timeoutId); + + if (!response.ok) { + // Parse error response and throw a custom exception + const errorData = await response.json(); + errorData.status_code = response.status; + throw new LLMWhispererClientException(errorData); + } + + return await response.json(); + } catch (error) { + if (error.name === "AbortError") { + // Handle request timeout + throw new LLMWhispererClientException({ + message: "Request timed out", + status_code: 408, + }); + } + throw error; + } + } } module.exports = { diff --git a/test/test.js b/test/test.js index 72f9101..b2a8e4b 100644 --- a/test/test.js +++ b/test/test.js @@ -63,5 +63,48 @@ describe("LLMWhispererClientV2", () => { }, 200000, ); + + test("highlight", async () => { + const dataDir = path.join(__dirname, "data"); + const inputFile = "credit_card.pdf"; + const filePath = path.join(dataDir, inputFile); + + // Call whisper API with line numbers enabled + const whisperResult = await client.whisper({ + addLineNos: true, + filePath: filePath, + waitForCompletion: true, + }); + + const whisperHash = whisperResult.whisper_hash; + + // Fetch highlight data for lines 1-2 + const highlightData = await client.getHighlightData(whisperHash, "1-2"); + + // Validate the response structure + expect(typeof highlightData).toBe("object"); + expect(Object.keys(highlightData).length).toBe(2); + expect(highlightData).toHaveProperty("1"); + expect(highlightData).toHaveProperty("2"); + + // Validate line 1 data + const line1 = highlightData["1"]; + expect(line1.base_y).toBe(0); + expect(line1.base_y_percent).toBe(0); + expect(line1.height).toBe(0); + expect(line1.height_percent).toBe(0); + expect(line1.page).toBe(0); + expect(line1.page_height).toBe(0); + expect(line1.raw).toEqual([0, 0, 0, 0]); + + // Validate line 2 data + const line2 = highlightData["2"]; + expect(line2.base_y).toBe(155); + expect(line2.base_y_percent).toBeCloseTo(4.8927, 4); // Approximate float comparison + expect(line2.height).toBe(51); + expect(line2.height_percent).toBeCloseTo(1.6098, 4); // Approximate float comparison + expect(line2.page).toBe(0); + expect(line2.page_height).toBe(3168); + }, 20000); // 20-second timeout }); From 7d10d2851039a99a0a3ea26ff3aa9bc60b4778db Mon Sep 17 00:00:00 2001 From: jagadeeswaran-zipstack Date: Tue, 25 Feb 2025 15:15:49 +0530 Subject: [PATCH 2/4] fixed status code error check --- index.js | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/index.js b/index.js index a7d0ce8..ed0884e 100644 --- a/index.js +++ b/index.js @@ -435,7 +435,7 @@ class LLMWhispererClientV2 { * @param {string} [options.useWebhook=''] - Whether to use a webhook. * @param {boolean} [options.waitForCompletion=false] - Whether to wait for completion. * @param {number} [options.waitTimeout=180] - The timeout for waiting. - * @param {boolean} [addLineNos=false] - If true, adds line numbers to the extracted text + * @param {boolean} [options.addLineNos=false] - If true, adds line numbers to the extracted text * and saves line metadata, which can be queried later * using the highlights API. @@ -763,47 +763,37 @@ class LLMWhispererClientV2 { * @throws {LLMWhispererClientException} If the API request fails. */ async getHighlightData(whisperHash, lines, extractAllLines = false) { - console.debug("highlight called"); + this.logger.debug("highlight called"); const url = `${this.baseUrl}/highlights`; // Build query parameters - const params = new URLSearchParams({ + const params = { whisper_hash: whisperHash, lines: lines, - extract_all_lines: extractAllLines.toString(), - }); - const finalUrl = `${url}?${params.toString()}`; - console.debug("url:", finalUrl); - - // Set up the AbortController for timeout support - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), this.apiTimeout); + extract_all_lines: extractAllLines, + }; try { - const response = await fetch(finalUrl, { + const response = await axios(url, { method: "GET", headers: this.headers, - signal: controller.signal, + params: params, }); - clearTimeout(timeoutId); - if (!response.ok) { + if (response.status != 200) { // Parse error response and throw a custom exception - const errorData = await response.json(); + const errorData = await response.data; errorData.status_code = response.status; throw new LLMWhispererClientException(errorData); } - return await response.json(); + return response.data; } catch (error) { - if (error.name === "AbortError") { - // Handle request timeout - throw new LLMWhispererClientException({ - message: "Request timed out", - status_code: 408, - }); - } - throw error; + const err = error.response + ? error.response.data + : { message: error.message }; + err.statusCode = error.response ? error.response.status : -1; + throw new LLMWhispererClientException(err.message, err.statusCode); } } } From 67852f21523def6f6b825d3d3d646bd4e9c18e6a Mon Sep 17 00:00:00 2001 From: jagadeeswaran-zipstack Date: Mon, 10 Mar 2025 14:38:53 +0530 Subject: [PATCH 3/4] Version update --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 4a1292c..35da739 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "llmwhisperer-client", - "version": "2.1.0", + "version": "2.1.1", "description": "LLMWhisper JS Client", "main": "index.js", "scripts": { From 16b4645f8c8be770e4b97cd212778fd4ea8796b9 Mon Sep 17 00:00:00 2001 From: jagadeeswaran-zipstack Date: Mon, 10 Mar 2025 14:41:42 +0530 Subject: [PATCH 4/4] Updated package version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 35da739..08abac5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "llmwhisperer-client", - "version": "2.1.1", + "version": "2.2.0", "description": "LLMWhisper JS Client", "main": "index.js", "scripts": {