From 5e3a9475dbe5f698f4e811866408e7d79c44d829 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Thu, 13 Mar 2025 21:39:32 +0800 Subject: [PATCH 01/14] feat: Add script to verify API link paths. --- apps/next/package.json | 3 +- apps/next/scripts/check-api-links.mts | 441 ++++++++++++++++++++++++++ 2 files changed, 443 insertions(+), 1 deletion(-) create mode 100644 apps/next/scripts/check-api-links.mts diff --git a/apps/next/package.json b/apps/next/package.json index 8ecc944b11..e14be88789 100644 --- a/apps/next/package.json +++ b/apps/next/package.json @@ -10,7 +10,8 @@ "start": "next start", "postbuild": "tsx scripts/post-build.mts && tsx scripts/validate-links.mts", "build:docs": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" typedoc && tsx scripts/generate-docs.mts", - "validate-links": "tsx scripts/validate-links.mts" + "validate-links": "tsx scripts/validate-links.mts", + "check-api-links": "tsx scripts/check-api-links.mts" }, "dependencies": { "@icons-pack/react-simple-icons": "^10.1.0", diff --git a/apps/next/scripts/check-api-links.mts b/apps/next/scripts/check-api-links.mts new file mode 100644 index 0000000000..aa4ad732f6 --- /dev/null +++ b/apps/next/scripts/check-api-links.mts @@ -0,0 +1,441 @@ +import fs from "fs/promises"; +import path from "path"; +import { fileURLToPath } from "url"; + +// get the directory of the current file +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +// Parse command line arguments +interface CommandLineArgs { + verbose: boolean; +} + +function parseArgs(): CommandLineArgs { + const args = process.argv.slice(2); + return { + verbose: args.includes("-v") || args.includes("--verbose"), + }; +} + +// Command line arguments +const args = parseArgs(); + +// define directories to check +const DOCS_DIRS = ["cloud", "llamaindex"]; + +// API docs base path +const API_DOCS_BASE_PATH = path.resolve(__dirname, "../src/content/docs/api"); + +/** + * Interface for API directory structure + */ +interface ApiDirectory { + path: string; // Full path to the directory + relativePath: string; // Path relative to API_DOCS_BASE_PATH + name: string; // Directory name +} + +/** + * Get all API directories by recursively scanning the api directory + */ +async function getApiDirectories(): Promise { + const directories: ApiDirectory[] = []; + + async function scanDirectories(dirPath: string, relativePath: string = "") { + try { + const entries = await fs.readdir(dirPath, { withFileTypes: true }); + + for (const entry of entries) { + // Skip hidden directories and files + if (entry.name.startsWith(".") || entry.name.startsWith("_")) { + continue; + } + + const fullPath = path.join(dirPath, entry.name); + const entryRelativePath = relativePath + ? path.join(relativePath, entry.name) + : entry.name; + + if (entry.isDirectory()) { + // Add this directory + directories.push({ + path: fullPath, + relativePath: entryRelativePath, + name: entry.name, + }); + + // Recursively scan subdirectories + await scanDirectories(fullPath, entryRelativePath); + } + } + } catch (error) { + console.error(`Error scanning directory ${dirPath}:`, error); + } + } + + await scanDirectories(API_DOCS_BASE_PATH); + + // Log found directories + console.log(`Found ${directories.length} API directories:`); + directories.forEach((dir) => { + console.log(`- ${dir.relativePath}`); + }); + + return directories; +} + +/** + * interface for API links + */ +interface ApiLink { + text: string; // original link text + path: string; // full path + apiPath: string; // path within the api directory (e.g., "classes/test/MyClass") + name: string; // API name (e.g. "MyClass") + folder: string; // top-level folder name (e.g. "classes") +} + +/** + * interface for API references + */ +interface ApiReference { + filePath: string; // source MDX file path + links: ApiLink[]; // API links array +} + +/** + * interface for check results + */ +interface CheckResult { + type: string; // API type path (e.g., "classes" or "classes/test") + missing: string[]; // Missing API docs + total: number; // Total references + found: number; // Found docs +} + +/** + * recursively get all MDX files in a directory + */ +async function getMdxFiles(dir: string): Promise { + const files: string[] = []; + + async function scan(directory: string) { + try { + const entries = await fs.readdir(directory, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(directory, entry.name); + + if (entry.isDirectory()) { + await scan(fullPath); + } else if (entry.isFile() && entry.name.endsWith(".mdx")) { + files.push(fullPath); + } + } + } catch (error) { + console.error(`Error reading directory ${directory}:`, error); + } + } + + await scan(dir); + return files; +} + +/** + * extract content between "## API Reference" and the next "##" or file end + */ +function extractApiReferenceSection(content: string): string | null { + const apiRefMatch = content.match(/## API Reference\s*([\s\S]*?)(?=\s*##|$)/); + return apiRefMatch ? apiRefMatch[1].trim() : null; +} + +/** + * parse links in markdown list items with format "[name](/docs/api/folder/name)" or "[name](/docs/api/folder/subfolder/name)" + */ +function parseApiLinks(content: string): ApiLink[] { + // Updated pattern to capture the entire path after /docs/api/ + const linkPattern = /- \[(.*?)\]\((\/docs\/api\/[^)]+)\)/g; + const links: ApiLink[] = []; + + let match; + while ((match = linkPattern.exec(content)) !== null) { + const text = match[1]; + const fullPath = match[2]; + + // parse path components from "/docs/api/path/to/name" format + const pathParts = fullPath.split("/"); + if ( + pathParts.length >= 5 && + pathParts[1] === "docs" && + pathParts[2] === "api" + ) { + // Extract the API path (everything after /docs/api/) + const apiPath = pathParts.slice(3).join("/"); + // The name is the last part of the path + const name = pathParts[pathParts.length - 1]; + // The top-level folder is the first part after /docs/api/ + const folder = pathParts[3]; + + links.push({ + text, + path: fullPath, + apiPath, + name, + folder, + }); + } + } + + return links; +} + +/** + * get API references from all MDX files in a document directory + */ +async function getAllApiReferences(docsDir: string): Promise { + const mdxFiles = await getMdxFiles(docsDir); + const references: ApiReference[] = []; + + for (const filePath of mdxFiles) { + try { + const content = await fs.readFile(filePath, "utf-8"); + const apiSection = extractApiReferenceSection(content); + + if (apiSection) { + const links = parseApiLinks(apiSection); + if (links.length > 0) { + references.push({ + filePath: path.relative(docsDir, filePath), + links, + }); + } + } + } catch (error) { + console.error(`Error reading file ${filePath}:`, error); + } + } + + return references; +} + +/** + * check if a file exists + */ +async function fileExists(filepath: string): Promise { + try { + await fs.access(filepath); + return true; + } catch { + return false; + } +} + +/** + * check if API references have corresponding document files + */ +async function checkApiLinks( + references: ApiReference[], + apiDirectories: ApiDirectory[], +): Promise { + // Create a map of all API directories by their relative path + const apiDirMap = new Map(); + apiDirectories.forEach((dir) => { + apiDirMap.set(dir.relativePath, dir); + }); + + // Group links by their API path + const linksByPath = new Map>(); + + // Initialize with all known API directories + apiDirectories.forEach((dir) => { + linksByPath.set(dir.relativePath, new Set()); + }); + + // Collect all links + references.forEach((ref) => { + ref.links.forEach((link) => { + // Get the directory part of the API path (without the name) + const pathParts = link.apiPath.split("/"); + const dirPath = + pathParts.length > 1 ? pathParts.slice(0, -1).join("/") : pathParts[0]; + + if (linksByPath.has(dirPath)) { + linksByPath.get(dirPath)?.add(link.name); + } else { + // Handle links to paths that weren't found in API directory + console.warn( + `Warning: Unknown API path "${dirPath}" referenced in link to ${link.name}`, + ); + // Create a new set for this path + linksByPath.set(dirPath, new Set([link.name])); + } + }); + }); + + // Check results + const results: CheckResult[] = []; + + // Check each path of links + for (const [dirPath, names] of linksByPath.entries()) { + if (names.size === 0) continue; + + const folderPath = path.join(API_DOCS_BASE_PATH, dirPath); + const missing: string[] = []; + let found = 0; + + for (const name of names) { + const docPath = path.join(folderPath, `${name}.mdx`); + const exists = await fileExists(docPath); + + if (!exists) { + missing.push(name); + } else { + found++; + } + } + + results.push({ + type: dirPath, + missing, + total: names.size, + found, + }); + } + + return results; +} + +interface DocResult { + docName: string; + results: CheckResult[]; + references: ApiReference[]; +} + +/** + * find and check API references in a specific document path + */ +async function findAndCheckReferences( + docName: string, + apiDirectories: ApiDirectory[], +): Promise { + const docsDir = path.resolve(__dirname, "../src/content/docs/", docName); + + try { + console.log(`Checking API references in ${docName} directory...`); + const references = await getAllApiReferences(docsDir); + + if (references.length === 0) { + console.log(`No API references found in ${docName} directory`); + return null; + } + + console.log( + `Found ${references.length} files containing API references in ${docName} directory`, + ); + + // check if references have corresponding documents + const results = await checkApiLinks(references, apiDirectories); + return { docName, results, references }; + } catch (error) { + console.error(`Error processing ${docName} directory:`, error); + return null; + } +} + +async function main(): Promise { + console.log("Start checking API links...\n"); + + // Dynamically get API directories by scanning the api directory + const apiDirectories = await getApiDirectories(); + + const allResults: DocResult[] = []; + + // check each document directory + for (const docDir of DOCS_DIRS) { + const result = await findAndCheckReferences(docDir, apiDirectories); + if (result) { + allResults.push(result); + } + } + + // log results + console.log("\n===== API links check results =====\n"); + + let hasMissing = false; + + for (const { docName, results, references } of allResults) { + console.log(`\n${docName} directory:`); + + // log referenced files with or without detailed link information based on verbose flag + console.log(`\nReferenced files${args.verbose ? " and links" : ""}:`); + references.forEach((ref) => { + console.log( + `\n- ${ref.filePath} (${ref.links.length} links)${args.verbose ? ":" : ""}`, + ); + + // Only show detailed links in verbose mode + if (args.verbose) { + // Group links by API path for better readability + const linksByPath = new Map(); + + ref.links.forEach((link) => { + const pathParts = link.apiPath.split("/"); + const dirPath = + pathParts.length > 1 + ? pathParts.slice(0, -1).join("/") + : pathParts[0]; + + if (!linksByPath.has(dirPath)) { + linksByPath.set(dirPath, []); + } + linksByPath.get(dirPath)?.push(link); + }); + + // Display links grouped by directory + for (const [dirPath, links] of linksByPath.entries()) { + console.log(` ${dirPath}/`); + links.forEach((link) => { + console.log(` - ${link.text} (${link.name})`); + }); + } + } + }); + + // log check results + console.log(`\nCheck results:`); + for (const result of results) { + console.log(`\n${result.type}:`); + console.log(`- total: ${result.total}`); + console.log(`- found: ${result.found}`); + console.log(`- missing: ${result.missing.length}`); + + if (result.missing.length > 0) { + hasMissing = true; + // list missing docs + console.log("\nMissing docs:"); + result.missing.forEach((name) => console.log(` - ${name}`)); + } + } + + console.log("\n---"); + } + + // Summary + console.log("\n===== Summary ====="); + if (hasMissing) { + // log missing message + console.log( + "Missing docs, please check the list above and create the corresponding document files.", + ); + process.exit(1); + } else { + // log success message + console.log("All API links have corresponding document files."); + } +} + +main().catch((error) => { + console.error("Error:", error); + process.exit(1); +}); From ae0e51d3a0ac75fbc185f4fe1d64250bd46ec024 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Thu, 13 Mar 2025 21:43:49 +0800 Subject: [PATCH 02/14] fix: entry path mismatch causing incomplete docs --- apps/next/typedoc.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/next/typedoc.json b/apps/next/typedoc.json index a545e44cd6..c12ac136cb 100644 --- a/apps/next/typedoc.json +++ b/apps/next/typedoc.json @@ -1,6 +1,9 @@ { "plugin": ["typedoc-plugin-markdown", "typedoc-plugin-merge-modules"], - "entryPoints": ["../../packages/**/src/index.ts"], + "entryPoints": [ + "../../packages/{,**/}index.ts", + "../../packages/{readers,cloud}/src/*.ts" + ], "exclude": [ "../../packages/autotool/**/src/index.ts", "**/node_modules/**", From a1ce5dacb5022834a1ea118ea2435e43e5b7be9d Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Thu, 13 Mar 2025 21:45:38 +0800 Subject: [PATCH 03/14] fix: expose class to enable documentation generation --- packages/core/src/response-synthesizers/factory.ts | 6 +++--- packages/core/src/response-synthesizers/index.ts | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/packages/core/src/response-synthesizers/factory.ts b/packages/core/src/response-synthesizers/factory.ts index 0e6ae17537..f0d17bd1d0 100644 --- a/packages/core/src/response-synthesizers/factory.ts +++ b/packages/core/src/response-synthesizers/factory.ts @@ -35,7 +35,7 @@ export type ResponseMode = z.infer; /** * A response builder that uses the query to ask the LLM generate a better response using multiple text chunks. */ -class Refine extends BaseSynthesizer { +export class Refine extends BaseSynthesizer { textQATemplate: TextQAPrompt; refineTemplate: RefinePrompt; @@ -213,7 +213,7 @@ class Refine extends BaseSynthesizer { /** * CompactAndRefine is a slight variation of Refine that first compacts the text chunks into the smallest possible number of chunks. */ -class CompactAndRefine extends Refine { +export class CompactAndRefine extends Refine { async getResponse( query: MessageContent, nodes: NodeWithScore[], @@ -267,7 +267,7 @@ class CompactAndRefine extends Refine { /** * TreeSummarize repacks the text chunks into the smallest possible number of chunks and then summarizes them, then recursively does so until there's one chunk left. */ -class TreeSummarize extends BaseSynthesizer { +export class TreeSummarize extends BaseSynthesizer { summaryTemplate: TreeSummarizePrompt; constructor( diff --git a/packages/core/src/response-synthesizers/index.ts b/packages/core/src/response-synthesizers/index.ts index a782d514f1..8c524f0743 100644 --- a/packages/core/src/response-synthesizers/index.ts +++ b/packages/core/src/response-synthesizers/index.ts @@ -2,7 +2,13 @@ export { BaseSynthesizer, type BaseSynthesizerOptions, } from "./base-synthesizer"; -export { getResponseSynthesizer, type ResponseMode } from "./factory"; +export { + CompactAndRefine, + Refine, + TreeSummarize, + getResponseSynthesizer, + type ResponseMode, +} from "./factory"; export type { SynthesizeEndEvent, SynthesizeQuery, From dfbed2e12a6a330f71162cf1eb1c387ae101d1cd Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Thu, 13 Mar 2025 21:46:33 +0800 Subject: [PATCH 04/14] fix: incorrect documentation link paths --- .../llamaindex/modules/data_stores/chat_stores/index.mdx | 2 +- .../docs/llamaindex/modules/evaluation/correctness.mdx | 2 +- .../content/docs/llamaindex/modules/evaluation/index.mdx | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/next/src/content/docs/llamaindex/modules/data_stores/chat_stores/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_stores/chat_stores/index.mdx index cadcc2ad41..4fe5e8f3eb 100644 --- a/apps/next/src/content/docs/llamaindex/modules/data_stores/chat_stores/index.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/data_stores/chat_stores/index.mdx @@ -12,5 +12,5 @@ Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for t ## API Reference -- [BaseChatStore](/docs/api/interfaces/BaseChatStore) +- [BaseChatStore](/docs/api/classes/BaseChatStore) diff --git a/apps/next/src/content/docs/llamaindex/modules/evaluation/correctness.mdx b/apps/next/src/content/docs/llamaindex/modules/evaluation/correctness.mdx index 50cb3c856a..c1189dfddb 100644 --- a/apps/next/src/content/docs/llamaindex/modules/evaluation/correctness.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/evaluation/correctness.mdx @@ -74,4 +74,4 @@ the response is not correct with a score of 2.5 ## API Reference -- [CorrectnessEvaluator](/docs/api/classes/CorrectnessEvaluator) +- [CorrectnessEvaluator](/docs/api/classes/CorrectnessEvaluator) \ No newline at end of file diff --git a/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx b/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx index e6352809d8..b5d32e8d49 100644 --- a/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx @@ -29,6 +29,6 @@ These evaluation modules are in the following forms: ## Usage -- [Correctness Evaluator](/docs/llamaindex/modules/evaluation/correctness) -- [Faithfulness Evaluator](/docs/llamaindex/modules/evaluation/faithfulness) -- [Relevancy Evaluator](/docs/llamaindex/modules/evaluation/relevancy) +- [Correctness Evaluator](/docs/llamaindex/modules/evaluation/modules/correctness) +- [Faithfulness Evaluator](/docs/llamaindex/modules/evaluation/modules/faithfulness) +- [Relevancy Evaluator](/docs/llamaindex/modules/evaluation/modules/relevancy) From 33e50755e6f0feb565be6a207212c08f25946b33 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Thu, 13 Mar 2025 23:11:03 +0800 Subject: [PATCH 05/14] fix: expose class to enable documentation generation --- packages/core/src/response-synthesizers/factory.ts | 4 ++-- packages/core/src/response-synthesizers/index.ts | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/core/src/response-synthesizers/factory.ts b/packages/core/src/response-synthesizers/factory.ts index f0d17bd1d0..7f9af5f4ea 100644 --- a/packages/core/src/response-synthesizers/factory.ts +++ b/packages/core/src/response-synthesizers/factory.ts @@ -23,7 +23,7 @@ import { } from "./base-synthesizer"; import { createMessageContent } from "./utils"; -const responseModeSchema = z.enum([ +export const responseModeSchema = z.enum([ "refine", "compact", "tree_summarize", @@ -370,7 +370,7 @@ export class TreeSummarize extends BaseSynthesizer { } } -class MultiModal extends BaseSynthesizer { +export class MultiModal extends BaseSynthesizer { metadataMode: MetadataMode; textQATemplate: TextQAPrompt; diff --git a/packages/core/src/response-synthesizers/index.ts b/packages/core/src/response-synthesizers/index.ts index 8c524f0743..907958b237 100644 --- a/packages/core/src/response-synthesizers/index.ts +++ b/packages/core/src/response-synthesizers/index.ts @@ -4,9 +4,11 @@ export { } from "./base-synthesizer"; export { CompactAndRefine, + MultiModal, Refine, TreeSummarize, getResponseSynthesizer, + responseModeSchema, type ResponseMode, } from "./factory"; export type { From f6ad5193e1e5e668308114422428f497d70bc6c5 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Thu, 13 Mar 2025 23:12:45 +0800 Subject: [PATCH 06/14] docs: update Response Synthesizer relations docs --- .../docs/llamaindex/modules/prompt/index.mdx | 11 +++++------ .../modules/response_synthesizer.mdx | 19 +++++++++++-------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx b/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx index f26d0dd3cc..af76d1e8a7 100644 --- a/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx @@ -28,13 +28,12 @@ Answer:`; ### 1. Customizing the default prompt on initialization -The first method is to create a new instance of `ResponseSynthesizer` (or the module you would like to update the prompt) and pass the custom prompt to the `responseBuilder` parameter. Then, pass the instance to the `asQueryEngine` method of the index. +The first method is to create a new instance of `Response Synthesizer` (or the module you would like to update the prompt) and pass the custom prompt to the `responseBuilder` parameter. Then, pass the instance to the `asQueryEngine` method of the index. ```ts -// Create an instance of response synthesizer -const responseSynthesizer = new ResponseSynthesizer({ - responseBuilder: new CompactAndRefine(undefined, newTextQaPrompt), -}); +// Create an instance of Response Synthesizer + +const responseSynthesizer = getResponseSynthesizer('compact') // Create index const index = await VectorStoreIndex.fromDocuments([document]); @@ -75,5 +74,5 @@ const response = await queryEngine.query({ ## API Reference -- [ResponseSynthesizer](/docs/api/classes/ResponseSynthesizer) +- [Response Synthesizer](/docs/llamaindex/modules/response_synthesizer) - [CompactAndRefine](/docs/api/classes/CompactAndRefine) diff --git a/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx b/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx index bda0d53bfd..47e0492c52 100644 --- a/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx @@ -12,15 +12,17 @@ The ResponseSynthesizer is responsible for sending the query, nodes, and prompt multiple compact prompts. The same as `refine`, but should result in less LLM calls. - `TreeSummarize`: Given a set of text chunks and the query, recursively construct a tree and return the root node as the response. Good for summarization purposes. -- `SimpleResponseBuilder`: Given a set of text chunks and the query, apply the query to each text - chunk while accumulating the responses into an array. Returns a concatenated string of all - responses. Good for when you need to run the same query separately against each text - chunk. +- `MultiModal`: Combines textual inputs with additional modality-specific metadata to generate an integrated response. + It leverages a text QA template to build a prompt that incorporates various input types and produces either streaming or complete responses. + This approach is ideal for use cases where enriching the answer with multi-modal context (such as images, audio, or other data) + can enhance the output quality. ```typescript -import { NodeWithScore, TextNode, ResponseSynthesizer } from "llamaindex"; +import { NodeWithScore, TextNode, getResponseSynthesizer, responseModeSchema } from "llamaindex"; -const responseSynthesizer = new ResponseSynthesizer(); +// you can also use responseModeSchema.Enum.refine, responseModeSchema.Enum.tree_summarize, responseModeSchema.Enum.multi_modal +// or you can use the CompactAndRefine, Refine, TreeSummarize, or MultiModal classes directly +const responseSynthesizer = getResponseSynthesizer(responseModeSchema.Enum.compact); const nodesWithScore: NodeWithScore[] = [ { @@ -55,8 +57,9 @@ for await (const chunk of stream) { ## API Reference -- [ResponseSynthesizer](/docs/api/classes/ResponseSynthesizer) +- [getResponseSynthesizer](/docs/api/functions/getResponseSynthesizer) +- [responseModeSchema](/docs/api/variables/responseModeSchema) - [Refine](/docs/api/classes/Refine) - [CompactAndRefine](/docs/api/classes/CompactAndRefine) - [TreeSummarize](/docs/api/classes/TreeSummarize) -- [SimpleResponseBuilder](/docs/api/classes/SimpleResponseBuilder) +- [MultiModal](/docs/api/classes/MultiModal) From eae36025526b9d33822120c19fe406fb0a8a5667 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Thu, 13 Mar 2025 23:25:20 +0800 Subject: [PATCH 07/14] docs: updated Usage and Description --- .../content/docs/llamaindex/modules/prompt/index.mdx | 12 ++++++++++-- .../docs/llamaindex/modules/response_synthesizer.mdx | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx b/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx index af76d1e8a7..d53ff387cf 100644 --- a/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/prompt/index.mdx @@ -28,12 +28,20 @@ Answer:`; ### 1. Customizing the default prompt on initialization -The first method is to create a new instance of `Response Synthesizer` (or the module you would like to update the prompt) and pass the custom prompt to the `responseBuilder` parameter. Then, pass the instance to the `asQueryEngine` method of the index. +The first method is to create a new instance of a Response Synthesizer (or the module you would like to update the prompt) by using the getResponseSynthesizer function. Instead of passing the custom prompt to the deprecated responseBuilder parameter, call getResponseSynthesizer with the mode as the first argument and supply the new prompt via the options parameter. ```ts // Create an instance of Response Synthesizer -const responseSynthesizer = getResponseSynthesizer('compact') +// Deprecated usage: +const responseSynthesizer = new ResponseSynthesizer({ + responseBuilder: new CompactAndRefine(undefined, newTextQaPrompt), +}); + +// Current usage: +const responseSynthesizer = getResponseSynthesizer('compact', { + textQATemplate: newTextQaPrompt +}) // Create index const index = await VectorStoreIndex.fromDocuments([document]); diff --git a/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx b/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx index 47e0492c52..8e94f5bd7f 100644 --- a/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/response_synthesizer.mdx @@ -1,5 +1,5 @@ --- -title: ResponseSynthesizer +title: Response Synthesizer --- The ResponseSynthesizer is responsible for sending the query, nodes, and prompt templates to the LLM to generate a response. There are a few key modes for generating a response: From b6f0bf4579020a66f049c8da5ef7b9c48cddd85c Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Fri, 14 Mar 2025 11:52:23 +0800 Subject: [PATCH 08/14] chore: remove papaparse dependency --- packages/readers/package.json | 1 - pnpm-lock.yaml | 8 -------- 2 files changed, 9 deletions(-) diff --git a/packages/readers/package.json b/packages/readers/package.json index 9a7d6338bc..80953af9c9 100644 --- a/packages/readers/package.json +++ b/packages/readers/package.json @@ -230,7 +230,6 @@ "mammoth": "^1.7.2", "mongodb": "^6.7.0", "notion-md-crawler": "^1.0.0", - "papaparse": "^5.4.1", "unpdf": "^0.12.1" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c828d90634..a5ff7b477b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1695,9 +1695,6 @@ importers: notion-md-crawler: specifier: ^1.0.0 version: 1.0.1 - papaparse: - specifier: ^5.4.1 - version: 5.5.2 unpdf: specifier: ^0.12.1 version: 0.12.1 @@ -9710,9 +9707,6 @@ packages: pako@1.0.11: resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==} - papaparse@5.5.2: - resolution: {integrity: sha512-PZXg8UuAc4PcVwLosEEDYjPyfWnTEhOrUfdv+3Bx+NuAb+5NhDmXzg5fHWmdCh1mP5p7JAZfFr3IMQfcntNAdA==} - parent-module@1.0.1: resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} engines: {node: '>=6'} @@ -22183,8 +22177,6 @@ snapshots: pako@1.0.11: {} - papaparse@5.5.2: {} - parent-module@1.0.1: dependencies: callsites: 3.1.0 From 7ce55082e825deb64a2f431ca27d7e98b0cb5c99 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Fri, 14 Mar 2025 12:00:36 +0800 Subject: [PATCH 09/14] chore: addon changeset --- .changeset/calm-eggs-type.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/calm-eggs-type.md diff --git a/.changeset/calm-eggs-type.md b/.changeset/calm-eggs-type.md new file mode 100644 index 0000000000..4ac61d8da3 --- /dev/null +++ b/.changeset/calm-eggs-type.md @@ -0,0 +1,7 @@ +--- +"@llamaindex/readers": patch +"@llamaindex/core": patch +"@llamaindex/doc": patch +--- + +Expose more content to fix the issue with unavailable documentation links, and adjust the documentation based on the latest code. From 1d9dabdd93c6f9e950adfbcb7624a3f217fcec0b Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Fri, 14 Mar 2025 13:50:11 +0800 Subject: [PATCH 10/14] chore: Revert "evaluation doc update" --- .../content/docs/llamaindex/modules/evaluation/index.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx b/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx index b5d32e8d49..e6352809d8 100644 --- a/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/evaluation/index.mdx @@ -29,6 +29,6 @@ These evaluation modules are in the following forms: ## Usage -- [Correctness Evaluator](/docs/llamaindex/modules/evaluation/modules/correctness) -- [Faithfulness Evaluator](/docs/llamaindex/modules/evaluation/modules/faithfulness) -- [Relevancy Evaluator](/docs/llamaindex/modules/evaluation/modules/relevancy) +- [Correctness Evaluator](/docs/llamaindex/modules/evaluation/correctness) +- [Faithfulness Evaluator](/docs/llamaindex/modules/evaluation/faithfulness) +- [Relevancy Evaluator](/docs/llamaindex/modules/evaluation/relevancy) From 68c44edd430e3850b462c8b601172248524dd219 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Fri, 14 Mar 2025 17:11:26 +0800 Subject: [PATCH 11/14] chore: Revert "Add script to verify API link paths" --- apps/next/package.json | 3 +- apps/next/scripts/check-api-links.mts | 441 -------------------------- 2 files changed, 1 insertion(+), 443 deletions(-) delete mode 100644 apps/next/scripts/check-api-links.mts diff --git a/apps/next/package.json b/apps/next/package.json index e14be88789..8ecc944b11 100644 --- a/apps/next/package.json +++ b/apps/next/package.json @@ -10,8 +10,7 @@ "start": "next start", "postbuild": "tsx scripts/post-build.mts && tsx scripts/validate-links.mts", "build:docs": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" typedoc && tsx scripts/generate-docs.mts", - "validate-links": "tsx scripts/validate-links.mts", - "check-api-links": "tsx scripts/check-api-links.mts" + "validate-links": "tsx scripts/validate-links.mts" }, "dependencies": { "@icons-pack/react-simple-icons": "^10.1.0", diff --git a/apps/next/scripts/check-api-links.mts b/apps/next/scripts/check-api-links.mts deleted file mode 100644 index aa4ad732f6..0000000000 --- a/apps/next/scripts/check-api-links.mts +++ /dev/null @@ -1,441 +0,0 @@ -import fs from "fs/promises"; -import path from "path"; -import { fileURLToPath } from "url"; - -// get the directory of the current file -const __filename = fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); - -// Parse command line arguments -interface CommandLineArgs { - verbose: boolean; -} - -function parseArgs(): CommandLineArgs { - const args = process.argv.slice(2); - return { - verbose: args.includes("-v") || args.includes("--verbose"), - }; -} - -// Command line arguments -const args = parseArgs(); - -// define directories to check -const DOCS_DIRS = ["cloud", "llamaindex"]; - -// API docs base path -const API_DOCS_BASE_PATH = path.resolve(__dirname, "../src/content/docs/api"); - -/** - * Interface for API directory structure - */ -interface ApiDirectory { - path: string; // Full path to the directory - relativePath: string; // Path relative to API_DOCS_BASE_PATH - name: string; // Directory name -} - -/** - * Get all API directories by recursively scanning the api directory - */ -async function getApiDirectories(): Promise { - const directories: ApiDirectory[] = []; - - async function scanDirectories(dirPath: string, relativePath: string = "") { - try { - const entries = await fs.readdir(dirPath, { withFileTypes: true }); - - for (const entry of entries) { - // Skip hidden directories and files - if (entry.name.startsWith(".") || entry.name.startsWith("_")) { - continue; - } - - const fullPath = path.join(dirPath, entry.name); - const entryRelativePath = relativePath - ? path.join(relativePath, entry.name) - : entry.name; - - if (entry.isDirectory()) { - // Add this directory - directories.push({ - path: fullPath, - relativePath: entryRelativePath, - name: entry.name, - }); - - // Recursively scan subdirectories - await scanDirectories(fullPath, entryRelativePath); - } - } - } catch (error) { - console.error(`Error scanning directory ${dirPath}:`, error); - } - } - - await scanDirectories(API_DOCS_BASE_PATH); - - // Log found directories - console.log(`Found ${directories.length} API directories:`); - directories.forEach((dir) => { - console.log(`- ${dir.relativePath}`); - }); - - return directories; -} - -/** - * interface for API links - */ -interface ApiLink { - text: string; // original link text - path: string; // full path - apiPath: string; // path within the api directory (e.g., "classes/test/MyClass") - name: string; // API name (e.g. "MyClass") - folder: string; // top-level folder name (e.g. "classes") -} - -/** - * interface for API references - */ -interface ApiReference { - filePath: string; // source MDX file path - links: ApiLink[]; // API links array -} - -/** - * interface for check results - */ -interface CheckResult { - type: string; // API type path (e.g., "classes" or "classes/test") - missing: string[]; // Missing API docs - total: number; // Total references - found: number; // Found docs -} - -/** - * recursively get all MDX files in a directory - */ -async function getMdxFiles(dir: string): Promise { - const files: string[] = []; - - async function scan(directory: string) { - try { - const entries = await fs.readdir(directory, { withFileTypes: true }); - - for (const entry of entries) { - const fullPath = path.join(directory, entry.name); - - if (entry.isDirectory()) { - await scan(fullPath); - } else if (entry.isFile() && entry.name.endsWith(".mdx")) { - files.push(fullPath); - } - } - } catch (error) { - console.error(`Error reading directory ${directory}:`, error); - } - } - - await scan(dir); - return files; -} - -/** - * extract content between "## API Reference" and the next "##" or file end - */ -function extractApiReferenceSection(content: string): string | null { - const apiRefMatch = content.match(/## API Reference\s*([\s\S]*?)(?=\s*##|$)/); - return apiRefMatch ? apiRefMatch[1].trim() : null; -} - -/** - * parse links in markdown list items with format "[name](/docs/api/folder/name)" or "[name](/docs/api/folder/subfolder/name)" - */ -function parseApiLinks(content: string): ApiLink[] { - // Updated pattern to capture the entire path after /docs/api/ - const linkPattern = /- \[(.*?)\]\((\/docs\/api\/[^)]+)\)/g; - const links: ApiLink[] = []; - - let match; - while ((match = linkPattern.exec(content)) !== null) { - const text = match[1]; - const fullPath = match[2]; - - // parse path components from "/docs/api/path/to/name" format - const pathParts = fullPath.split("/"); - if ( - pathParts.length >= 5 && - pathParts[1] === "docs" && - pathParts[2] === "api" - ) { - // Extract the API path (everything after /docs/api/) - const apiPath = pathParts.slice(3).join("/"); - // The name is the last part of the path - const name = pathParts[pathParts.length - 1]; - // The top-level folder is the first part after /docs/api/ - const folder = pathParts[3]; - - links.push({ - text, - path: fullPath, - apiPath, - name, - folder, - }); - } - } - - return links; -} - -/** - * get API references from all MDX files in a document directory - */ -async function getAllApiReferences(docsDir: string): Promise { - const mdxFiles = await getMdxFiles(docsDir); - const references: ApiReference[] = []; - - for (const filePath of mdxFiles) { - try { - const content = await fs.readFile(filePath, "utf-8"); - const apiSection = extractApiReferenceSection(content); - - if (apiSection) { - const links = parseApiLinks(apiSection); - if (links.length > 0) { - references.push({ - filePath: path.relative(docsDir, filePath), - links, - }); - } - } - } catch (error) { - console.error(`Error reading file ${filePath}:`, error); - } - } - - return references; -} - -/** - * check if a file exists - */ -async function fileExists(filepath: string): Promise { - try { - await fs.access(filepath); - return true; - } catch { - return false; - } -} - -/** - * check if API references have corresponding document files - */ -async function checkApiLinks( - references: ApiReference[], - apiDirectories: ApiDirectory[], -): Promise { - // Create a map of all API directories by their relative path - const apiDirMap = new Map(); - apiDirectories.forEach((dir) => { - apiDirMap.set(dir.relativePath, dir); - }); - - // Group links by their API path - const linksByPath = new Map>(); - - // Initialize with all known API directories - apiDirectories.forEach((dir) => { - linksByPath.set(dir.relativePath, new Set()); - }); - - // Collect all links - references.forEach((ref) => { - ref.links.forEach((link) => { - // Get the directory part of the API path (without the name) - const pathParts = link.apiPath.split("/"); - const dirPath = - pathParts.length > 1 ? pathParts.slice(0, -1).join("/") : pathParts[0]; - - if (linksByPath.has(dirPath)) { - linksByPath.get(dirPath)?.add(link.name); - } else { - // Handle links to paths that weren't found in API directory - console.warn( - `Warning: Unknown API path "${dirPath}" referenced in link to ${link.name}`, - ); - // Create a new set for this path - linksByPath.set(dirPath, new Set([link.name])); - } - }); - }); - - // Check results - const results: CheckResult[] = []; - - // Check each path of links - for (const [dirPath, names] of linksByPath.entries()) { - if (names.size === 0) continue; - - const folderPath = path.join(API_DOCS_BASE_PATH, dirPath); - const missing: string[] = []; - let found = 0; - - for (const name of names) { - const docPath = path.join(folderPath, `${name}.mdx`); - const exists = await fileExists(docPath); - - if (!exists) { - missing.push(name); - } else { - found++; - } - } - - results.push({ - type: dirPath, - missing, - total: names.size, - found, - }); - } - - return results; -} - -interface DocResult { - docName: string; - results: CheckResult[]; - references: ApiReference[]; -} - -/** - * find and check API references in a specific document path - */ -async function findAndCheckReferences( - docName: string, - apiDirectories: ApiDirectory[], -): Promise { - const docsDir = path.resolve(__dirname, "../src/content/docs/", docName); - - try { - console.log(`Checking API references in ${docName} directory...`); - const references = await getAllApiReferences(docsDir); - - if (references.length === 0) { - console.log(`No API references found in ${docName} directory`); - return null; - } - - console.log( - `Found ${references.length} files containing API references in ${docName} directory`, - ); - - // check if references have corresponding documents - const results = await checkApiLinks(references, apiDirectories); - return { docName, results, references }; - } catch (error) { - console.error(`Error processing ${docName} directory:`, error); - return null; - } -} - -async function main(): Promise { - console.log("Start checking API links...\n"); - - // Dynamically get API directories by scanning the api directory - const apiDirectories = await getApiDirectories(); - - const allResults: DocResult[] = []; - - // check each document directory - for (const docDir of DOCS_DIRS) { - const result = await findAndCheckReferences(docDir, apiDirectories); - if (result) { - allResults.push(result); - } - } - - // log results - console.log("\n===== API links check results =====\n"); - - let hasMissing = false; - - for (const { docName, results, references } of allResults) { - console.log(`\n${docName} directory:`); - - // log referenced files with or without detailed link information based on verbose flag - console.log(`\nReferenced files${args.verbose ? " and links" : ""}:`); - references.forEach((ref) => { - console.log( - `\n- ${ref.filePath} (${ref.links.length} links)${args.verbose ? ":" : ""}`, - ); - - // Only show detailed links in verbose mode - if (args.verbose) { - // Group links by API path for better readability - const linksByPath = new Map(); - - ref.links.forEach((link) => { - const pathParts = link.apiPath.split("/"); - const dirPath = - pathParts.length > 1 - ? pathParts.slice(0, -1).join("/") - : pathParts[0]; - - if (!linksByPath.has(dirPath)) { - linksByPath.set(dirPath, []); - } - linksByPath.get(dirPath)?.push(link); - }); - - // Display links grouped by directory - for (const [dirPath, links] of linksByPath.entries()) { - console.log(` ${dirPath}/`); - links.forEach((link) => { - console.log(` - ${link.text} (${link.name})`); - }); - } - } - }); - - // log check results - console.log(`\nCheck results:`); - for (const result of results) { - console.log(`\n${result.type}:`); - console.log(`- total: ${result.total}`); - console.log(`- found: ${result.found}`); - console.log(`- missing: ${result.missing.length}`); - - if (result.missing.length > 0) { - hasMissing = true; - // list missing docs - console.log("\nMissing docs:"); - result.missing.forEach((name) => console.log(` - ${name}`)); - } - } - - console.log("\n---"); - } - - // Summary - console.log("\n===== Summary ====="); - if (hasMissing) { - // log missing message - console.log( - "Missing docs, please check the list above and create the corresponding document files.", - ); - process.exit(1); - } else { - // log success message - console.log("All API links have corresponding document files."); - } -} - -main().catch((error) => { - console.error("Error:", error); - process.exit(1); -}); From 111dd563d05bcf18ad7b08bcf055024f322bdd96 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Sat, 15 Mar 2025 13:20:14 +0800 Subject: [PATCH 12/14] docs: update data loader docs --- .../src/content/docs/llamaindex/modules/data_loaders/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/next/src/content/docs/llamaindex/modules/data_loaders/index.mdx b/apps/next/src/content/docs/llamaindex/modules/data_loaders/index.mdx index 295540506e..188ddba65b 100644 --- a/apps/next/src/content/docs/llamaindex/modules/data_loaders/index.mdx +++ b/apps/next/src/content/docs/llamaindex/modules/data_loaders/index.mdx @@ -35,7 +35,7 @@ Currently, the following readers are mapped to specific file types: - [TextFileReader](/docs/api/classes/TextFileReader): `.txt` - [PDFReader](/docs/api/classes/PDFReader): `.pdf` -- [PapaCSVReader](/docs/api/classes/PapaCSVReader): `.csv` +- [CSVReader](/docs/api/classes/CSVReader): `.csv` - [MarkdownReader](/docs/api/classes/MarkdownReader): `.md` - [DocxReader](/docs/api/classes/DocxReader): `.docx` - [HTMLReader](/docs/api/classes/HTMLReader): `.htm`, `.html` From a5b35b66ac37ec9f615431d28f6dff99a676df09 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Sat, 15 Mar 2025 14:10:58 +0800 Subject: [PATCH 13/14] feat: enable validate-links & support 2 hash formats Update link normalization logic to remove any trailing slash after stripping query parameters and hash fragments. This ensures that both "api/interfaces/MetadataFilter#operator" and "api/interfaces/MetadataFilter/#operator" are correctly normalized. --- apps/next/scripts/validate-links.mts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/next/scripts/validate-links.mts b/apps/next/scripts/validate-links.mts index cafaee980f..86a1393fe9 100644 --- a/apps/next/scripts/validate-links.mts +++ b/apps/next/scripts/validate-links.mts @@ -162,7 +162,12 @@ async function validateLinks(): Promise { const invalidLinks = links.filter(({ link }) => { // Check if the link exists in valid routes // First normalize the link (remove any query string or hash) - const normalizedLink = link.split("#")[0].split("?")[0]; + const baseLink = link.split("?")[0].split("#")[0]; + // Remove the trailing slash if present. + // This works with links like "api/interfaces/MetadataFilter#operator" and "api/interfaces/MetadataFilter/#operator". + const normalizedLink = baseLink.endsWith("/") + ? baseLink.slice(0, -1) + : baseLink; // Remove llamaindex/ prefix if it exists as it's the root of the docs let routePath = normalizedLink; @@ -192,8 +197,7 @@ async function main() { try { // Check for invalid internal links - const validationResults: LinkValidationResult[] = []; - await validateLinks(); + const validationResults: LinkValidationResult[] = await validateLinks(); // Check for relative links const relativeLinksResults = await findRelativeLinks(); From f67eb9ab03febd2be9b08c3218a104f170bb2e15 Mon Sep 17 00:00:00 2001 From: Jack Qian Date: Sun, 16 Mar 2025 14:00:58 +0800 Subject: [PATCH 14/14] fix: next build OOM --- apps/next/typedoc.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/next/typedoc.json b/apps/next/typedoc.json index c12ac136cb..5b79b46f71 100644 --- a/apps/next/typedoc.json +++ b/apps/next/typedoc.json @@ -2,10 +2,12 @@ "plugin": ["typedoc-plugin-markdown", "typedoc-plugin-merge-modules"], "entryPoints": [ "../../packages/{,**/}index.ts", - "../../packages/{readers,cloud}/src/*.ts" + "../../packages/readers/src/*.ts", + "../../packages/cloud/src/{reader,utils}.ts" ], "exclude": [ "../../packages/autotool/**/src/index.ts", + "../../packages/cloud/src/client/index.ts", "**/node_modules/**", "**/dist/**", "**/test/**",